Skip to content

Commit 8abc5e6

Browse files
committed
Catch and handle errors in UTF-7 text conversion
1 parent 689978a commit 8abc5e6

File tree

1 file changed

+47
-1
lines changed

1 file changed

+47
-1
lines changed

ext/mbstring/libmbfl/filters/mbfilter_utf7.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "mbfilter.h"
3131
#include "mbfilter_utf7.h"
3232

33+
static int mbfl_filt_conv_utf7_wchar_flush(mbfl_convert_filter *filter);
34+
3335
static const unsigned char mbfl_base64_table[] = {
3436
/* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
3537
0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
@@ -62,7 +64,7 @@ const struct mbfl_convert_vtbl vtbl_utf7_wchar = {
6264
mbfl_filt_conv_common_ctor,
6365
NULL,
6466
mbfl_filt_conv_utf7_wchar,
65-
mbfl_filt_conv_common_flush,
67+
mbfl_filt_conv_utf7_wchar_flush,
6668
NULL,
6769
};
6870

@@ -102,6 +104,11 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
102104
if (filter->status) { /* Modified Base64 */
103105
n = decode_base64_char(c);
104106
if (n < 0) {
107+
if (filter->cache) {
108+
/* Either we were expecting the 2nd half of a surrogate pair which
109+
* never came, or else the last Base64 data was not padded with zeroes */
110+
(*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data);
111+
}
105112
if (c == '-') {
106113
if (filter->status == 1) { /* "+-" -> "+" */
107114
CK((*filter->output_function)('+', filter->data));
@@ -143,6 +150,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
143150
n = (n & 0x3) << 14;
144151
filter->status = 5;
145152
if (s >= 0xd800 && s < 0xdc00) {
153+
if (filter->cache & 0xfff0000) {
154+
/* We were waiting for the 2nd part of a surrogate pair */
155+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
156+
}
146157
s = (((s & 0x3ff) << 16) + 0x400000) | n;
147158
filter->cache = s;
148159
} else if (s >= 0xdc00 && s < 0xe000) {
@@ -155,6 +166,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
155166
CK((*filter->output_function)(s | MBFL_WCSGROUP_THROUGH, filter->data));
156167
}
157168
} else {
169+
if (filter->cache & 0xfff0000) {
170+
/* We were waiting for the 2nd part of a surrogate pair */
171+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
172+
}
158173
filter->cache = n;
159174
CK((*filter->output_function)(s, filter->data));
160175
}
@@ -173,6 +188,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
173188
n = (n & 0xf) << 12;
174189
filter->status = 8;
175190
if (s >= 0xd800 && s < 0xdc00) {
191+
if (filter->cache & 0xfff0000) {
192+
/* We were waiting for the 2nd part of a surrogate pair */
193+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
194+
}
176195
s = (((s & 0x3ff) << 16) + 0x400000) | n;
177196
filter->cache = s;
178197
} else if (s >= 0xdc00 && s < 0xe000) {
@@ -185,6 +204,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
185204
CK((*filter->output_function)(s | MBFL_WCSGROUP_THROUGH, filter->data));
186205
}
187206
} else {
207+
if (filter->cache & 0xfff0000) {
208+
/* We were waiting for the 2nd part of a surrogate pair */
209+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
210+
}
188211
filter->cache = n;
189212
CK((*filter->output_function)(s, filter->data));
190213
}
@@ -198,6 +221,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
198221
s = n | (filter->cache & 0xffff);
199222
filter->status = 2;
200223
if (s >= 0xd800 && s < 0xdc00) {
224+
if (filter->cache & 0xfff0000) {
225+
/* We were waiting for the 2nd part of a surrogate pair */
226+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
227+
}
201228
s = (((s & 0x3ff) << 16) + 0x400000);
202229
filter->cache = s;
203230
} else if (s >= 0xdc00 && s < 0xe000) {
@@ -212,6 +239,10 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
212239
CK((*filter->output_function)(s, filter->data));
213240
}
214241
} else {
242+
if (filter->cache & 0xfff0000) {
243+
/* We were waiting for the 2nd part of a surrogate pair */
244+
(*filter->output_function)(((filter->cache & 0xfff0000) >> 6) | MBFL_WCSGROUP_THROUGH, filter->data);
245+
}
215246
filter->cache = 0;
216247
CK((*filter->output_function)(s, filter->data));
217248
}
@@ -225,6 +256,21 @@ int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
225256
return c;
226257
}
227258

259+
static int mbfl_filt_conv_utf7_wchar_flush(mbfl_convert_filter *filter)
260+
{
261+
if (filter->cache) {
262+
/* Either we were expecting the 2nd half of a surrogate pair which
263+
* never came, or else the last Base64 data was not padded with zeroes */
264+
(*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data);
265+
}
266+
267+
if (filter->flush_function) {
268+
(*filter->flush_function)(filter->data);
269+
}
270+
271+
return 0;
272+
}
273+
228274
int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter)
229275
{
230276
int s;

0 commit comments

Comments
 (0)