Skip to content

Commit 0ce755b

Browse files
committed
Implement mb_encode_mimeheader using fast text conversion filters
The behavior of the new mb_encode_mimeheader implementation closely follows the old implementation, except for three points: • The old implementation was missing a call to the mbfl_convert_filter flush function. So it would sometimes truncate the input string just before its end. • The old implementation would drop zero bytes when QPrint-encoding. So for example, if you tried to QPrint-encode the UTF-32BE string "\x00\x00\x12\x34", its QPrint-encoding would be "=12=34", which does not decode to a valid UTF-32BE string. This is now fixed. • In some rare corner cases, the new implementation will choose to Base64-encode or QPrint-encode the input string, where the old implementation would have just added newlines to it. Specifically, this can happen when there is a non-space ASCII character, followed by a large number of ASCII spaces, followed by a non-ASCII character. The new implementation is around 2.5-8x faster than the old one, depending on the text encoding and transfer encoding used. Performance gains are greater with Base64 transfer encoding than with QPrint transfer encoding; this is not because QPrint-encoding bytes is slow, but because QPrint-encoded output is much bigger than Base64-encoded output and takes more lines, so we have to go through the process of finding the right place to break a line many more times.
1 parent 6ebb506 commit 0ce755b

File tree

8 files changed

+649
-464
lines changed

8 files changed

+649
-464
lines changed

ext/mbstring/libmbfl/filters/mbfilter_base64.c

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,13 @@ int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
9999
filter->cache |= (c & 0xff) << 8;
100100
} else {
101101
filter->status &= ~0xff;
102-
if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
103-
n = (filter->status & 0xff00) >> 8;
104-
if (n > 72) {
105-
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
106-
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
107-
filter->status &= ~0xff00;
108-
}
109-
filter->status += 0x400;
102+
n = (filter->status & 0xff00) >> 8;
103+
if (n > 72) {
104+
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
105+
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
106+
filter->status &= ~0xff00;
110107
}
108+
filter->status += 0x400;
111109
n = filter->cache | (c & 0xff);
112110
CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
113111
CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
@@ -129,11 +127,9 @@ int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
129127
filter->cache = 0;
130128
/* flush fragments */
131129
if (status >= 1) {
132-
if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
133-
if (len > 72){
134-
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
135-
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
136-
}
130+
if (len > 72){
131+
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
132+
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
137133
}
138134
CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
139135
CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));

ext/mbstring/libmbfl/filters/mbfilter_qprint.c

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
#include "mbfilter.h"
3131
#include "mbfilter_qprint.h"
32-
#include "unicode_prop.h"
3332

3433
static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
3534
static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
@@ -96,28 +95,25 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
9695
break;
9796
}
9897

99-
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
100-
if (s == 0x0a || (s == 0x0d && c != 0x0a)) { /* line feed */
101-
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
102-
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
103-
filter->status &= ~0xff00;
104-
break;
105-
} else if (s == 0x0d) {
106-
break;
107-
}
98+
if (s == '\n' || (s == '\r' && c != '\n')) { /* line feed */
99+
CK((*filter->output_function)('\r', filter->data));
100+
CK((*filter->output_function)('\n', filter->data));
101+
filter->status &= ~0xff00;
102+
break;
103+
} else if (s == 0x0d) {
104+
break;
108105
}
109106

110-
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0 && n >= 72) { /* soft line feed */
111-
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
112-
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
113-
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
107+
if (n >= 72) { /* soft line feed */
108+
CK((*filter->output_function)('=', filter->data));
109+
CK((*filter->output_function)('\r', filter->data));
110+
CK((*filter->output_function)('\n', filter->data));
114111
filter->status &= ~0xff00;
115112
}
116113

117-
if (s <= 0 || s >= 0x80 || s == 0x3d /* not ASCII or '=' */
118-
|| ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) && mime_char_needs_qencode[s])) {
114+
if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
119115
/* hex-octet */
120-
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
116+
CK((*filter->output_function)('=', filter->data));
121117
n = (s >> 4) & 0xf;
122118
if (n < 10) {
123119
n += 48; /* '0' */
@@ -132,14 +128,10 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
132128
n += 55;
133129
}
134130
CK((*filter->output_function)(n, filter->data));
135-
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
136-
filter->status += 0x300;
137-
}
131+
filter->status += 0x300;
138132
} else {
139133
CK((*filter->output_function)(s, filter->data));
140-
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
141-
filter->status += 0x100;
142-
}
134+
filter->status += 0x100;
143135
}
144136
break;
145137
}

0 commit comments

Comments
 (0)