Skip to content

Commit 2f98bd8

Browse files
committed
SJIS-2004 encoding conversion: handle invalid (or truncated) 2nd byte for Kanji correctly
If the 2nd byte of a 2-byte character is invalid, then mb_substitute_character() should be respected. Instead, what mbstring was doing was 'swallowing' the first byte, then emitting the 2nd byte as if it was an ASCII character. Likewise, if the 2nd byte is missing, instead of just keeping quiet, report an illegal character as specified by mb_substitute_character().
1 parent a5827c2 commit 2f98bd8

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed

ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = {
6262
mbfl_filt_conv_common_ctor,
6363
NULL,
6464
mbfl_filt_conv_wchar_jis2004,
65-
mbfl_filt_conv_jis2004_flush,
65+
mbfl_filt_conv_wchar_jis2004_flush,
6666
NULL,
6767
};

ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
6262
mbfl_filt_conv_common_ctor,
6363
NULL,
6464
mbfl_filt_conv_wchar_jis2004,
65-
mbfl_filt_conv_jis2004_flush,
65+
mbfl_filt_conv_wchar_jis2004_flush,
6666
NULL,
6767
};

ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
5757
mbfl_filt_conv_common_ctor,
5858
NULL,
5959
mbfl_filt_conv_jis2004_wchar,
60-
mbfl_filt_conv_common_flush,
60+
mbfl_filt_conv_jis2004_wchar_flush,
6161
NULL,
6262
};
6363

@@ -67,7 +67,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
6767
mbfl_filt_conv_common_ctor,
6868
NULL,
6969
mbfl_filt_conv_wchar_jis2004,
70-
mbfl_filt_conv_jis2004_flush,
70+
mbfl_filt_conv_wchar_jis2004_flush,
7171
NULL,
7272
};
7373

@@ -202,6 +202,9 @@ mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter)
202202
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
203203
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
204204
SJIS_DECODE(c1, c, s1, s2);
205+
} else {
206+
CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
207+
break;
205208
}
206209
} else {
207210
s1 = c1;
@@ -471,6 +474,14 @@ mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter)
471474
return c;
472475
}
473476

477+
int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
478+
{
479+
if (filter->status & 0xF) {
480+
CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
481+
}
482+
return 0;
483+
}
484+
474485
int
475486
mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
476487
int k;
@@ -665,7 +676,7 @@ mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
665676
}
666677

667678
int
668-
mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
679+
mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter)
669680
{
670681
int k, c1, c2, s1, s2;
671682

ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;
3939
int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter);
4040
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter);
4141

42-
int mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter);
42+
int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter);
43+
int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter);
4344

4445
#endif /* MBFL_MBFILTER_SJIS_2004_H */
4546

0 commit comments

Comments
 (0)