Skip to content

Commit e6f1a72

Browse files
committed
Add test suite for mobile variants of UTF-8 (and fix bugs)
1 parent 1865576 commit e6f1a72

File tree

8 files changed

+2218
-36
lines changed

8 files changed

+2218
-36
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,10 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
832832
int c1 = filter->cache;
833833
if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
834834
CK((*filter->output_function)(c1, filter->data));
835+
} else if (filter->status == 2) {
836+
/* First of a pair of Regional Indicator codepoints came at the end of a string */
837+
filter->cache = filter->status = 0;
838+
mbfl_filt_conv_illegal_output(c1, filter);
835839
}
836840

837841
if (filter->flush_function) {

ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "mbfilter_sjis_mobile.h"
3434

3535
extern int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter);
36+
extern int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter);
3637

3738
extern const unsigned char mblen_table_utf8[];
3839

@@ -100,7 +101,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8_docomo = {
100101
mbfl_filt_conv_common_ctor,
101102
NULL,
102103
mbfl_filt_conv_wchar_utf8_mobile,
103-
mbfl_filt_conv_common_flush,
104+
mbfl_filt_conv_sjis_mobile_flush,
104105
NULL,
105106
};
106107

@@ -120,7 +121,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_a = {
120121
mbfl_filt_conv_common_ctor,
121122
NULL,
122123
mbfl_filt_conv_wchar_utf8_mobile,
123-
mbfl_filt_conv_common_flush,
124+
mbfl_filt_conv_sjis_mobile_flush,
124125
NULL,
125126
};
126127

@@ -140,7 +141,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_b = {
140141
mbfl_filt_conv_common_ctor,
141142
NULL,
142143
mbfl_filt_conv_wchar_utf8_mobile,
143-
mbfl_filt_conv_common_flush,
144+
mbfl_filt_conv_sjis_mobile_flush,
144145
NULL,
145146
};
146147

@@ -160,16 +161,13 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8_sb = {
160161
mbfl_filt_conv_common_ctor,
161162
NULL,
162163
mbfl_filt_conv_wchar_utf8_mobile,
163-
mbfl_filt_conv_common_flush,
164+
mbfl_filt_conv_sjis_mobile_flush,
164165
NULL,
165166
};
166167

167168
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
168169
int mbfl_filt_put_invalid_char(int c, mbfl_convert_filter *filter);
169170

170-
/*
171-
* UTF-8 => wchar
172-
*/
173171
int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
174172
{
175173
int s, s1 = 0, c1 = 0, snd = 0;
@@ -192,25 +190,22 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
192190
CK(mbfl_filt_put_invalid_char(c, filter));
193191
}
194192
break;
193+
195194
case 0x10: /* 2byte code 2nd char: 0x80-0xbf */
196195
case 0x21: /* 3byte code 3rd char: 0x80-0xbf */
197196
case 0x32: /* 4byte code 4th char: 0x80-0xbf */
198197
filter->status = 0;
199198
if (c >= 0x80 && c <= 0xbf) {
200-
s = (filter->cache<<6) | (c & 0x3f);
199+
s = (filter->cache << 6) | (c & 0x3f);
201200
filter->cache = 0;
202201

203-
if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo &&
204-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) {
202+
if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) {
205203
s = mbfilter_sjis_emoji_docomo2unicode(s1, &snd);
206-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a &&
207-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7) > 0) {
204+
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7) > 0) {
208205
s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
209-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
210-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) {
206+
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) {
211207
s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd);
212-
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb &&
213-
mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) {
208+
} else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) {
214209
s = mbfilter_sjis_emoji_sb2unicode(s1, &snd);
215210
}
216211

@@ -223,12 +218,13 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
223218
goto retry;
224219
}
225220
break;
221+
226222
case 0x20: /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
227-
s = (filter->cache<<6) | (c & 0x3f);
223+
s = (filter->cache << 6) | (c & 0x3f);
228224
c1 = filter->cache & 0xf;
229225

230226
if ((c >= 0x80 && c <= 0xbf) &&
231-
((c1 == 0x0 && c >= 0xa0) ||
227+
((c1 == 0x0 && c >= 0xa0) ||
232228
(c1 == 0xd && c < 0xa0) ||
233229
(c1 > 0x0 && c1 != 0xd))) {
234230
filter->cache = s;
@@ -238,12 +234,13 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
238234
goto retry;
239235
}
240236
break;
237+
241238
case 0x30: /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
242-
s = (filter->cache<<6) | (c & 0x3f);
239+
s = (filter->cache << 6) | (c & 0x3f);
243240
c1 = filter->cache & 0x7;
244241

245242
if ((c >= 0x80 && c <= 0xbf) &&
246-
((c1 == 0x0 && c >= 0x90) ||
243+
((c1 == 0x0 && c >= 0x90) ||
247244
(c1 == 0x4 && c < 0x90) ||
248245
(c1 > 0x0 && c1 != 0x4))) {
249246
filter->cache = s;
@@ -253,9 +250,10 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
253250
goto retry;
254251
}
255252
break;
253+
256254
case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
257255
if (c >= 0x80 && c <= 0xbf) {
258-
filter->cache = (filter->cache<<6) | (c & 0x3f);
256+
filter->cache = (filter->cache << 6) | (c & 0x3f);
259257
filter->status++;
260258
} else {
261259
CK(mbfl_filt_put_invalid_char(filter->cache, filter));
@@ -270,26 +268,15 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
270268
return c;
271269
}
272270

273-
/*
274-
* wchar => UTF-8
275-
*/
276271
int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter)
277272
{
278273
if (c >= 0 && c < 0x110000) {
279274
int s1, c1;
280275

281-
if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo &&
282-
mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 &&
283-
mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) ||
284-
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a &&
285-
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
286-
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) ||
287-
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b &&
288-
mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 &&
289-
mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
290-
(filter->to->no_encoding == mbfl_no_encoding_utf8_sb &&
291-
mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 &&
292-
mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) {
276+
if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) ||
277+
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) ||
278+
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
279+
(filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) {
293280
c = c1;
294281
}
295282

0 commit comments

Comments
 (0)