Skip to content

Commit 2bd8ed5

Browse files
committed
Improving the BASE64 decoder to handle lines that are not multiples of four
The BASE64 decoder didn't take into account lines that were not multiples of four and this resulted in a corrupted decode. Changed the decoder so that state can be preserved between subsequent calls. Manual merge of PR#99 Closes #96
1 parent 82c04ee commit 2bd8ed5

File tree

4 files changed

+164
-57
lines changed

4 files changed

+164
-57
lines changed

Changelog

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ HYPERMAIL VERSION 3.0.0
101101
print modules are used to, but including now markers for the beginning
102102
and end of sections, etc.
103103

104+
2023-05-16 Andy Valencia (@vandys)
105+
* src/{parse.c, base64.h, base64.c}
106+
The BASE64 decoder didn't take into account lines that were not
107+
multiples of four and this resulted in a corrupted decode. Changed the
108+
decoder so that state can be preserved between subsequent calls.
109+
Closes #96
110+
104111
2023-05-12 @shlomif
105112
* README
106113
Fixed spelling errors

src/base64.c

Lines changed: 106 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,85 +10,146 @@
1010
**
1111
** - Encoded strings that ended with more than one = caused the decode
1212
** function+ to generate 3 extra zero bytes at the end of the output.
13+
**
14+
** CHANGES by Andy Valencia - May 15 2023:
15+
**
16+
** - Preserve decoding state between calls to the
17+
** base64_stream_decode() function to take into account UA that
18+
** output intermediate base64 lines that are not always multiple of
19+
** four.
1320
*/
1421

1522
#include "hypermail.h"
1623
#include "base64.h"
1724

18-
void base64Decode(char *intext, char *out, int *length)
25+
/*
26+
* base64_decoder_state_new()
27+
* Allocate a new base64 decoder state
28+
*/
29+
struct base64_decoder_state *base64_decoder_state_new(void)
30+
{
31+
struct base64_decoder_state *st = (struct base64_decoder_state *)emalloc(sizeof(struct base64_decoder_state));
32+
if (!st) {
33+
return(0);
34+
}
35+
memset(st, 0, sizeof(struct base64_decoder_state));
36+
return(st);
37+
}
38+
39+
/*
40+
* base64_decoder_state_free()
41+
* Release storage
42+
*/
43+
void base64_decoder_state_free(struct base64_decoder_state *st)
44+
{
45+
free(st);
46+
}
47+
48+
/*
49+
* base64_decode_stream()
50+
*
51+
* Accept base64 "intext",
52+
* place resulting decoded output in a null-terminated "out".
53+
*
54+
* "st" is our state, which will be updated and can carry state between
55+
* calls.
56+
*/
57+
int base64_decode_stream(struct base64_decoder_state *st, const char *intext, char *out)
1958
{
20-
unsigned char ibuf[4];
21-
unsigned char obuf[3];
2259
char ignore;
23-
char endtext = FALSE;
2460
char ch;
25-
int lindex = 0;
26-
*length = 0;
27-
28-
memset(ibuf, 0, sizeof(ibuf));
61+
int length;
62+
63+
/* Ignore trailing garbage */
64+
if (st->endtext) {
65+
*out = '\0';
66+
return(0);
67+
}
2968

69+
length = 0;
3070
while (*intext) {
3171
ch = *intext;
32-
3372
ignore = FALSE;
34-
if ((ch >= 'A') && (ch <= 'Z'))
73+
74+
if ((ch >= 'A') && (ch <= 'Z')) {
3575
ch = ch - 'A';
36-
else if ((ch >= 'a') && (ch <= 'z'))
76+
} else if ((ch >= 'a') && (ch <= 'z')) {
3777
ch = ch - 'a' + 26;
38-
else if ((ch >= '0') && (ch <= '9'))
78+
} else if ((ch >= '0') && (ch <= '9')) {
3979
ch = ch - '0' + 52;
40-
else if (ch == '+')
80+
} else if (ch == '+') {
4181
ch = 62;
42-
else if (ch == '=') { /* end of text */
43-
if (endtext)
82+
} else if (ch == '=') { /* end of text */
83+
if (st->endtext) {
4484
break;
45-
endtext = TRUE;
46-
lindex--;
47-
if (lindex < 0)
48-
lindex = 3;
49-
}
50-
else if (ch == '/')
85+
}
86+
st->endtext = TRUE;
87+
st->lindex--;
88+
if (st->lindex < 0) {
89+
st->lindex = 3;
90+
}
91+
} else if (ch == '/') {
5192
ch = 63;
52-
else if (endtext)
93+
} else if (st->endtext) {
5394
break;
54-
else
95+
} else {
5596
ignore = TRUE;
56-
97+
}
98+
5799
if (!ignore) {
58-
if (!endtext) {
59-
ibuf[lindex] = ch;
100+
if (!st->endtext) {
101+
st->ibuf[st->lindex] = ch;
60102

61-
lindex++;
62-
lindex &= 3; /* use bit arithmetic instead of remainder */
103+
st->lindex++;
104+
st->lindex &= 3; /* use bit arithmetic instead of remainder */
63105
}
64-
if ((0 == lindex) || endtext) {
65-
66-
obuf[0] = (ibuf[0] << 2) | ((ibuf[1] & 0x30) >> 4);
67-
obuf[1] =
68-
((ibuf[1] & 0x0F) << 4) | ((ibuf[2] & 0x3C) >> 2);
69-
obuf[2] = ((ibuf[2] & 0x03) << 6) | (ibuf[3] & 0x3F);
106+
if ((0 == st->lindex) || st->endtext) {
70107

71-
switch (lindex) {
108+
st->obuf[0] = (st->ibuf[0] << 2) | ((st->ibuf[1] & 0x30) >> 4);
109+
st->obuf[1] =
110+
((st->ibuf[1] & 0x0F) << 4) | ((st->ibuf[2] & 0x3C) >> 2);
111+
st->obuf[2] = ((st->ibuf[2] & 0x03) << 6) | (st->ibuf[3] & 0x3F);
112+
113+
switch (st->lindex) {
72114
case 1:
73-
sprintf(out, "%c", obuf[0]);
74-
out++;
75-
(*length)++;
115+
*out++ = st->obuf[0];
116+
length += 1;
76117
break;
77118
case 2:
78-
sprintf(out, "%c%c", obuf[0], obuf[1]);
79-
out += 2;
80-
(*length) += 2;
119+
*out++ = st->obuf[0];
120+
*out++ = st->obuf[1];
121+
length += 2;
81122
break;
82123
default:
83-
sprintf(out, "%c%c%c", obuf[0], obuf[1], obuf[2]);
84-
out += 3;
85-
(*length) += 3;
124+
*out++ = st->obuf[0];
125+
*out++ = st->obuf[1];
126+
*out++ = st->obuf[2];
127+
length += 3;
86128
break;
87129
}
88-
memset(ibuf, 0, sizeof(ibuf));
130+
memset(st->ibuf, 0, sizeof(st->ibuf));
89131
}
90132
}
91133
intext++;
92134
}
93135
*out = 0;
136+
return (length);
137+
}
138+
139+
/*
140+
* base64_decode_string()
141+
* Convenience wrapper when decoding a single string
142+
*/
143+
int base64_decode_string(const char *intext, char *out)
144+
{
145+
struct base64_decoder_state *st = base64_decoder_state_new();
146+
int length = 0;
147+
148+
if (!st) {
149+
return 0;
150+
}
151+
length = base64_decode_stream(st, intext, out);
152+
base64_decoder_state_free(st);
153+
154+
return (length);
94155
}

src/base64.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,25 @@
22
** MIME Decode - base64.c
33
*/
44

5-
#ifndef HYPERMAIL_BASE64_H
6-
#define HYPERMAIL_BASE64_H
7-
void base64Decode(char *, char *, int *);
5+
#ifndef _HYPERMAIL_BASE64_H
6+
#define _HYPERMAIL_BASE64_H
87

9-
#endif /* HYPERMAIL_BASE64_H */
8+
/* Common state as you feed successive lines into base64_decode_stream() */
9+
struct base64_decoder_state {
10+
unsigned char ibuf[4]; /* input buffer */
11+
unsigned char obuf[3]; /* output buffer */
12+
int lindex; /* index for ibuf / obuf */
13+
char endtext; /* base64 end detected */
14+
};
15+
16+
/* allocate and free a base64_state structure */
17+
struct base64_decoder_state *base64_decoder_state_new(void);
18+
void base64_decoder_state_free(struct base64_decoder_state *);
19+
20+
/* decode a stream made of multiple base64 lines */
21+
int base64_decode_stream(struct base64_decoder_state *, const char *, char *);
22+
23+
/* decode a single string */
24+
int base64_decode_string(const char *, char *);
25+
26+
#endif /* _HYPERMAIL_BASE64_H */

src/parse.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,12 +1019,12 @@ static char *mdecodeRFC2047(char *string, int length, char *charsetsave)
10191019
}
10201020
else if (!strcasecmp("b", encoding)) {
10211021
/* base64 decoding */
1022-
int len;
10231022
size_t charsetlen;
10241023
#ifdef HAVE_ICONV
10251024
size_t tmplen;
10261025
char *output2;
1027-
base64Decode(ptr, output, &len);
1026+
1027+
base64_decode_string(ptr, output);
10281028
output2=i18n_convstring(output,charset,"UTF-8",&tmplen);
10291029
memcpy(output,output2,tmplen);
10301030
output += tmplen;
@@ -1033,7 +1033,9 @@ static char *mdecodeRFC2047(char *string, int length, char *charsetsave)
10331033
memcpy(charsetsave,charset,charsetlen);
10341034
charsetsave[charsetlen] = '\0';
10351035
#else
1036-
base64Decode(ptr, output, &len);
1036+
int len;
1037+
1038+
len = base64_decode_string(ptr, output)
10371039
output += len;
10381040
#endif
10391041
}
@@ -1754,7 +1756,8 @@ int parsemail(char *mbox, /* file name */
17541756
the content_disposition */
17551757
char *description = NULL; /* user-supplied description for an attachment */
17561758
char attach_force;
1757-
1759+
struct base64_decoder_state *b64_decoder_state = NULL; /* multi-line base64 decoding */
1760+
17581761
EncodeType decode = ENCODE_NORMAL;
17591762
ContentType content = CONTENT_TEXT;
17601763

@@ -2043,10 +2046,15 @@ int parsemail(char *mbox, /* file name */
20432046
if (!message_headers_parsed) {
20442047
getname(head->line, &namep, &emailp);
20452048
if (set_spamprotect) {
2046-
emailp = spamify(strsav(emailp));
2049+
char *tmp;
2050+
tmp = emailp;
2051+
emailp = spamify(tmp);
2052+
free(tmp);
20472053
/* we need to "fix" the name as well, as sometimes
20482054
the email ends up in the name part */
2049-
namep = spamify(strsav(namep));
2055+
tmp = strsav(namep);
2056+
namep = spamify(tmp);
2057+
free(tmp);
20502058
}
20512059
}
20522060
}
@@ -2402,6 +2410,7 @@ int parsemail(char *mbox, /* file name */
24022410
}
24032411
else if (!strncasecmp(ptr, "BASE64", 6)) {
24042412
decode = ENCODE_BASE64;
2413+
b64_decoder_state = base64_decoder_state_new();
24052414
}
24062415
else if (!strncasecmp(ptr, "8BIT", 4)) {
24072416
decode = ENCODE_NORMAL;
@@ -3237,6 +3246,10 @@ int parsemail(char *mbox, /* file name */
32373246
/* go back to default mode: */
32383247
file_created = alternative_lastfile_created = NO_FILE;
32393248
content = CONTENT_TEXT;
3249+
if (decode == ENCODE_BASE64) {
3250+
base64_decoder_state_free(b64_decoder_state);
3251+
b64_decoder_state = NULL;
3252+
}
32403253
decode = ENCODE_NORMAL;
32413254
Mime_B = FALSE;
32423255
skip_mime_epilogue = FALSE;
@@ -3532,6 +3545,10 @@ int parsemail(char *mbox, /* file name */
35323545
}
35333546
/* go back to the MIME attachment default mode */
35343547
content = CONTENT_TEXT;
3548+
if (decode == ENCODE_BASE64) {
3549+
base64_decoder_state_free(b64_decoder_state);
3550+
b64_decoder_state = NULL;
3551+
}
35353552
decode = ENCODE_NORMAL;
35363553
multilinenoend = FALSE;
35373554
*attachname = '\0';
@@ -3573,7 +3590,7 @@ int parsemail(char *mbox, /* file name */
35733590
}
35743591
break;
35753592
case ENCODE_BASE64:
3576-
base64Decode(line, newbuffer, &datalen);
3593+
datalen = base64_decode_stream(b64_decoder_state, line, newbuffer);
35773594
data = newbuffer;
35783595
break;
35793596
case ENCODE_UUENCODE:
@@ -4003,8 +4020,9 @@ int parsemail(char *mbox, /* file name */
40034020
}
40044021
}
40054022

4006-
if (ENCODE_QP == decode)
4023+
if (ENCODE_QP == decode) {
40074024
free(data); /* this was allocatd by mdecodeQP() */
4025+
}
40084026
}
40094027
}
40104028
}
@@ -4221,6 +4239,10 @@ int parsemail(char *mbox, /* file name */
42214239

42224240
/* go back to default mode: */
42234241
content = CONTENT_TEXT;
4242+
if (ENCODE_BASE64 == decode) {
4243+
base64_decoder_state_free(b64_decoder_state);
4244+
b64_decoder_state = NULL;
4245+
}
42244246
decode = ENCODE_NORMAL;
42254247
Mime_B = FALSE;
42264248
skip_mime_epilogue = FALSE;

0 commit comments

Comments
 (0)