Skip to content

Commit 4c39cd3

Browse files
committed
SJIS-mac encoding conversion: handle invalid (or truncated) 2nd byte for Kanji correctly
Also, don't accept 1st bytes above 0xED, since none of the possible 2-byte sequences starting with 0xEE and above are actually mapped to any character.
1 parent d40f9cf commit 4c39cd3

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737

3838
extern const unsigned char mblen_table_sjis[];
3939

40-
static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
40+
static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter);
41+
static int mbfl_filt_conv_sjis_mac_wchar_flush(mbfl_convert_filter *filter);
4142

4243
static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL};
4344

@@ -58,7 +59,7 @@ const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
5859
mbfl_filt_conv_common_ctor,
5960
NULL,
6061
mbfl_filt_conv_sjis_mac_wchar,
61-
mbfl_filt_conv_common_flush,
62+
mbfl_filt_conv_sjis_mac_wchar_flush,
6263
NULL,
6364
};
6465

@@ -68,7 +69,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = {
6869
mbfl_filt_conv_common_ctor,
6970
NULL,
7071
mbfl_filt_conv_wchar_sjis_mac,
71-
mbfl_filt_conv_sjis_mac_flush,
72+
mbfl_filt_conv_wchar_sjis_mac_flush,
7273
NULL,
7374
};
7475

@@ -132,7 +133,7 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
132133
CK((*filter->output_function)(c, filter->data));
133134
} else if (c > 0xa0 && c < 0xe0) { /* kana */
134135
CK((*filter->output_function)(0xfec0 + c, filter->data));
135-
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
136+
} else if (c > 0x80 && c <= 0xed && c != 0xa0) { /* kanji first char */
136137
filter->status = 1;
137138
filter->cache = c;
138139
} else if (c == 0x5c) {
@@ -216,6 +217,10 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
216217
for (i=0; i<8; i++) {
217218
if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) {
218219
w = code_map[i][s - code_ofst_tbl[i][0]];
220+
if (w == 0) {
221+
CK((*filter->output_function)((c1 << 8) | c | MBFL_WCSGROUP_THROUGH, filter->data));
222+
return c;
223+
}
219224
s2 = 0;
220225
if (s >= 0x043e && s <= 0x0441) {
221226
s2 = 0xf87a;
@@ -247,8 +252,6 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
247252
w |= MBFL_WCSPLANE_WINCP932;
248253
}
249254
CK((*filter->output_function)(w, filter->data));
250-
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
251-
CK((*filter->output_function)(c, filter->data));
252255
} else {
253256
w = (c1 << 8) | c;
254257
w &= MBFL_WCSGROUP_MASK;
@@ -265,6 +268,15 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
265268
return c;
266269
}
267270

271+
static int mbfl_filt_conv_sjis_mac_wchar_flush(mbfl_convert_filter *filter)
272+
{
273+
if (filter->status == 1) {
274+
int w = (filter->cache & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH;
275+
CK((*filter->output_function)(w, filter->data));
276+
}
277+
return 0;
278+
}
279+
268280
/*
269281
* wchar => SJIS-mac
270282
*/
@@ -660,7 +672,7 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
660672
}
661673

662674
static int
663-
mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
675+
mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter)
664676
{
665677
int i, c1, s1 = 0;
666678
if (filter->status == 1 && filter->cache > 0) {

0 commit comments

Comments
 (0)