Skip to content

Commit 1865576

Browse files
committed
Add test suite for EUC-JP-WIN (or EUC-JP-MS) text encoding (and fix bugs)
1 parent 6a693d2 commit 1865576

File tree

4 files changed

+15356
-55
lines changed

4 files changed

+15356
-55
lines changed

ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c

Lines changed: 51 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
#include "unicode_table_jis.h"
3535
#include "cp932_table.h"
3636

37+
static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter);
38+
3739
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
3840
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3941
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -72,7 +74,7 @@ const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {
7274
mbfl_filt_conv_common_ctor,
7375
NULL,
7476
mbfl_filt_conv_eucjpwin_wchar,
75-
mbfl_filt_conv_common_flush,
77+
mbfl_filt_conv_eucjpwin_wchar_flush,
7678
NULL,
7779
};
7880

@@ -88,29 +90,23 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = {
8890

8991
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
9092

91-
/*
92-
* eucJP-win => wchar
93-
*/
94-
int
95-
mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
93+
int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
9694
{
9795
int c1, s, w, n;
9896

9997
switch (filter->status) {
10098
case 0:
101-
if (c >= 0 && c < 0x80) { /* latin */
99+
if (c >= 0 && c < 0x80) { /* latin */
102100
CK((*filter->output_function)(c, filter->data));
103-
} else if (c > 0xa0 && c < 0xff) { /* CP932 first char */
101+
} else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */
104102
filter->status = 1;
105103
filter->cache = c;
106-
} else if (c == 0x8e) { /* kana first char */
104+
} else if (c == 0x8e) { /* kana first char */
107105
filter->status = 2;
108-
} else if (c == 0x8f) { /* X 0212 first char */
106+
} else if (c == 0x8f) { /* X 0212 first char */
109107
filter->status = 3;
110108
} else {
111-
w = c & MBFL_WCSGROUP_MASK;
112-
w |= MBFL_WCSGROUP_THROUGH;
113-
CK((*filter->output_function)(w, filter->data));
109+
CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
114110
}
115111
break;
116112

@@ -137,6 +133,7 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
137133
w = 0xffe2; /* FULLWIDTH NOT SIGN */
138134
}
139135
}
136+
140137
if (w == 0) {
141138
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
142139
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
@@ -146,18 +143,13 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
146143
w = s - (84 * 94) + 0xe000;
147144
}
148145
}
146+
149147
if (w <= 0) {
150-
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
151-
w &= MBFL_WCSPLANE_MASK;
152-
w |= MBFL_WCSPLANE_WINCP932;
148+
w = ((c1 & 0x7f) << 8) | (c & 0x7f) | MBFL_WCSPLANE_WINCP932;
153149
}
154150
CK((*filter->output_function)(w, filter->data));
155-
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
156-
CK((*filter->output_function)(c, filter->data));
157151
} else {
158-
w = (c1 << 8) | c;
159-
w &= MBFL_WCSGROUP_MASK;
160-
w |= MBFL_WCSGROUP_THROUGH;
152+
w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH;
161153
CK((*filter->output_function)(w, filter->data));
162154
}
163155
break;
@@ -167,37 +159,30 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
167159
if (c > 0xa0 && c < 0xe0) {
168160
w = 0xfec0 + c;
169161
CK((*filter->output_function)(w, filter->data));
170-
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
171-
CK((*filter->output_function)(c, filter->data));
172162
} else {
173-
w = 0x8e00 | c;
174-
w &= MBFL_WCSGROUP_MASK;
175-
w |= MBFL_WCSGROUP_THROUGH;
163+
w = 0x8e00 | c | MBFL_WCSGROUP_THROUGH;
176164
CK((*filter->output_function)(w, filter->data));
177165
}
178166
break;
179167

180168
case 3: /* got 0x8f, X 0212 first char */
181-
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
182-
CK((*filter->output_function)(c, filter->data));
183-
filter->status = 0;
184-
} else {
185-
filter->status++;
186-
filter->cache = c;
187-
}
169+
filter->status++;
170+
filter->cache = c;
188171
break;
189172
case 4: /* got 0x8f, X 0212 second char */
190173
filter->status = 0;
191174
c1 = filter->cache;
192175
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
193176
s = (c1 - 0xa1)*94 + c - 0xa1;
177+
194178
if (s >= 0 && s < jisx0212_ucs_table_size) {
195179
w = jisx0212_ucs_table[s];
180+
196181
if (w == 0x007e) {
197182
w = 0xff5e; /* FULLWIDTH TILDE */
198183
}
199184
} else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
200-
s = (c1<< 8) | c;
185+
s = (c1 << 8) | c;
201186
w = 0;
202187
n = 0;
203188
while (n < cp932ext3_eucjp_table_size) {
@@ -214,21 +199,17 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
214199
} else {
215200
w = 0;
216201
}
202+
217203
if (w == 0x00A6) {
218204
w = 0xFFE4; /* FULLWIDTH BROKEN BAR */
219205
}
206+
220207
if (w <= 0) {
221-
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
222-
w &= MBFL_WCSPLANE_MASK;
223-
w |= MBFL_WCSPLANE_JIS0212;
208+
w = ((c1 & 0x7f) << 8) | (c & 0x7f) | MBFL_WCSPLANE_JIS0212;
224209
}
225210
CK((*filter->output_function)(w, filter->data));
226-
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
227-
CK((*filter->output_function)(c, filter->data));
228211
} else {
229-
w = (c1 << 8) | c | 0x8f0000;
230-
w &= MBFL_WCSGROUP_MASK;
231-
w |= MBFL_WCSGROUP_THROUGH;
212+
w = (c1 << 8) | c | 0x8f0000 | MBFL_WCSGROUP_THROUGH;
232213
CK((*filter->output_function)(w, filter->data));
233214
}
234215
break;
@@ -241,42 +222,56 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
241222
return c;
242223
}
243224

244-
/*
245-
* wchar => eucJP-win
246-
*/
247-
int
248-
mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
225+
static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter)
249226
{
250-
int c1, c2, s1;
227+
if (filter->status) {
228+
(*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data);
229+
}
230+
231+
if (filter->flush_function) {
232+
(*filter->flush_function)(filter->data);
233+
}
251234

252-
s1 = 0;
253-
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
235+
return 0;
236+
}
237+
238+
int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
239+
{
240+
int c1, c2, s1 = 0;
241+
242+
if (c == 0xAF) { /* U+00AF is MACRON */
243+
s1 = 0xA2B4; /* Use JIS X 0212 overline */
244+
} else if (c == 0x203E) {
245+
s1 = 0x7E;
246+
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
254247
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
255248
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
256249
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
257250
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
258251
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
259252
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
260253
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
261-
} else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */
254+
} else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */
262255
s1 = c - 0xe000;
263256
c1 = s1/94 + 0x75;
264257
c2 = s1%94 + 0x21;
265258
s1 = (c1 << 8) | c2;
266-
} else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */
259+
} else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */
267260
s1 = c - (0xe000 + 10*94);
268261
c1 = s1/94 + 0xf5;
269262
c2 = s1%94 + 0xa1;
270263
s1 = (c1 << 8) | c2;
271264
}
265+
272266
if (s1 == 0xa2f1) {
273267
s1 = 0x2d62; /* NUMERO SIGN */
274268
}
269+
275270
if (s1 <= 0) {
276271
if (c == 0xa5) { /* YEN SIGN */
277-
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
278-
} else if (c == 0x203e) { /* OVER LINE */
279-
s1 = 0x2131; /* FULLWIDTH MACRON */
272+
s1 = 0x5C;
273+
} else if (c == 0x2014) {
274+
s1 = 0x213D;
280275
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
281276
s1 = 0x2140;
282277
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
@@ -318,6 +313,7 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
318313
}
319314
}
320315
}
316+
321317
if (c == 0) {
322318
s1 = 0;
323319
} else if (s1 <= 0) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
0x5C 0x00A5
2+
0x7E 0x203E
3+
0x8FA2B7 0xFF5E
4+
0x8FA2C3 0x00A6
5+
0x8FA2F1 0x2116
6+
0x8FF3FD 0x2160
7+
0x8FF3FE 0x2161
8+
0x8FF4A1 0x2162
9+
0x8FF4A2 0x2163
10+
0x8FF4A3 0x2164
11+
0x8FF4A4 0x2165
12+
0x8FF4A5 0x2166
13+
0x8FF4A6 0x2167
14+
0x8FF4A7 0x2168
15+
0x8FF4A8 0x2169
16+
0x8FF4AB 0x3231
17+
0x8FF4AC 0x2116
18+
0x8FF4AD 0x2121
19+
0xA1BD 0x2014
20+
0xA1C1 0x301C
21+
0xA1C2 0x2016
22+
0xA1DD 0x2212
23+
0xA1F1 0x00A2
24+
0xA1F2 0x00A3
25+
0xA2CC 0x00AC
26+
0xADF0 0x2252
27+
0xADF1 0x2261
28+
0xADF2 0x222B
29+
0xADF5 0x221A
30+
0xADF6 0x22A5
31+
0xADF7 0x2220
32+
0xADFA 0x2235
33+
0xADFB 0x2229
34+
0xADFC 0x222A

0 commit comments

Comments
 (0)