32
32
33
33
#include "unicode_table_big5.h"
34
34
35
+ static int mbfl_filt_conv_big5_wchar_flush (mbfl_convert_filter * filter );
36
+
35
37
static const unsigned char mblen_table_big5 [] = { /* 0x81-0xFE */
36
38
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
37
39
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -81,7 +83,7 @@ const struct mbfl_convert_vtbl vtbl_big5_wchar = {
81
83
mbfl_filt_conv_common_ctor ,
82
84
NULL ,
83
85
mbfl_filt_conv_big5_wchar ,
84
- mbfl_filt_conv_common_flush ,
86
+ mbfl_filt_conv_big5_wchar_flush ,
85
87
NULL ,
86
88
};
87
89
@@ -101,7 +103,7 @@ const struct mbfl_convert_vtbl vtbl_cp950_wchar = {
101
103
mbfl_filt_conv_common_ctor ,
102
104
NULL ,
103
105
mbfl_filt_conv_big5_wchar ,
104
- mbfl_filt_conv_common_flush ,
106
+ mbfl_filt_conv_big5_wchar_flush ,
105
107
NULL ,
106
108
};
107
109
@@ -119,60 +121,48 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp950 = {
119
121
120
122
/* 63 + 94 = 157 or 94 */
121
123
static unsigned short cp950_pua_tbl [][4 ] = {
122
- {0xe000 ,0xe310 ,0xfa40 ,0xfefe },
123
- {0xe311 ,0xeeb7 ,0x8e40 ,0xa0fe },
124
- {0xeeb8 ,0xf6b0 ,0x8140 ,0x8dfe },
125
- {0xf6b1 ,0xf70e ,0xc6a1 ,0xc6fe },
126
- {0xf70f ,0xf848 ,0xc740 ,0xc8fe },
124
+ {0xe000 , 0xe310 , 0xfa40 , 0xfefe },
125
+ {0xe311 , 0xeeb7 , 0x8e40 , 0xa0fe },
126
+ {0xeeb8 , 0xf6b0 , 0x8140 , 0x8dfe },
127
+ {0xf6b1 , 0xf70e , 0xc6a1 , 0xc6fe },
128
+ {0xf70f , 0xf848 , 0xc740 , 0xc8fe },
127
129
};
128
130
129
- static inline int is_in_cp950_pua (int c1 , int c ) {
131
+ static inline int is_in_cp950_pua (int c1 , int c )
132
+ {
130
133
if ((c1 >= 0xfa && c1 <= 0xfe ) || (c1 >= 0x8e && c1 <= 0xa0 ) ||
131
134
(c1 >= 0x81 && c1 <= 0x8d ) || (c1 >= 0xc7 && c1 <= 0xc8 )) {
132
- return (c >=0x40 && c <= 0x7e ) || (c >= 0xa1 && c <= 0xfe );
133
- }
134
- if (c1 == 0xc6 ) {
135
+ return (c >= 0x40 && c <= 0x7e ) || (c >= 0xa1 && c <= 0xfe );
136
+ } else if (c1 == 0xc6 ) {
135
137
return c >= 0xa1 && c <= 0xfe ;
136
138
}
137
139
return 0 ;
138
140
}
139
141
140
- /*
141
- * Big5 => wchar
142
- */
143
- int
144
- mbfl_filt_conv_big5_wchar (int c , mbfl_convert_filter * filter )
142
+ int mbfl_filt_conv_big5_wchar (int c , mbfl_convert_filter * filter )
145
143
{
146
- int k ;
147
- int c1 , w , c2 ;
144
+ int k , c1 , w ;
148
145
149
146
switch (filter -> status ) {
150
147
case 0 :
151
- if (filter -> from -> no_encoding == mbfl_no_encoding_cp950 ) {
152
- c1 = 0x80 ;
153
- } else {
154
- c1 = 0xa0 ;
155
- }
156
-
157
- if (c >= 0 && c <= 0x80 ) { /* latin */
148
+ if (c >= 0 && c < 0x80 ) { /* latin */
158
149
CK ((* filter -> output_function )(c , filter -> data ));
159
- } else if (c == 0xff ) {
160
- CK ((* filter -> output_function )(0xf8f8 , filter -> data ));
161
- } else if (c > c1 && c < 0xff ) { /* dbcs lead byte */
150
+ } else if (filter -> from -> no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8 ) {
151
+ filter -> status = 1 ;
152
+ filter -> cache = c ;
153
+ } else if (filter -> from -> no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE ) {
162
154
filter -> status = 1 ;
163
155
filter -> cache = c ;
164
156
} else {
165
- w = c & MBFL_WCSGROUP_MASK ;
166
- w |= MBFL_WCSGROUP_THROUGH ;
167
- CK ((* filter -> output_function )(w , filter -> data ));
157
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
168
158
}
169
159
break ;
170
160
171
- case 1 : /* dbcs second byte */
161
+ case 1 : /* dbcs second byte */
172
162
filter -> status = 0 ;
173
163
c1 = filter -> cache ;
174
- if ((c > 0x39 && c < 0x7f ) | (c > 0xa0 && c < 0xff )) {
175
- if (c < 0x7f ){
164
+ if ((c > 0x3f && c < 0x7f ) | | (c > 0xa0 && c < 0xff )) {
165
+ if (c < 0x7f ) {
176
166
w = (c1 - 0xa1 )* 157 + (c - 0x40 );
177
167
} else {
178
168
w = (c1 - 0xa1 )* 157 + (c - 0xa1 ) + 0x3f ;
@@ -185,35 +175,67 @@ mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
185
175
186
176
if (filter -> from -> no_encoding == mbfl_no_encoding_cp950 ) {
187
177
/* PUA for CP950 */
188
- if (w <= 0 && is_in_cp950_pua (c1 , c )) {
189
- c2 = c1 << 8 | c ;
190
- for (k = 0 ; k < sizeof (cp950_pua_tbl )/(sizeof (unsigned short )* 4 ); k ++ ) {
178
+ if (is_in_cp950_pua (c1 , c )) {
179
+ int c2 = (c1 << 8 ) | c ;
180
+
181
+ for (k = 0 ; k < sizeof (cp950_pua_tbl ) / (sizeof (unsigned short )* 4 ); k ++ ) {
191
182
if (c2 >= cp950_pua_tbl [k ][2 ] && c2 <= cp950_pua_tbl [k ][3 ]) {
192
183
break ;
193
184
}
194
185
}
195
186
196
187
if ((cp950_pua_tbl [k ][2 ] & 0xff ) == 0x40 ) {
197
- w = 157 * (c1 - (cp950_pua_tbl [k ][2 ]>>8 )) + c - (c >= 0xa1 ? 0x62 : 0x40 )
198
- + cp950_pua_tbl [k ][0 ];
188
+ w = 157 * (c1 - (cp950_pua_tbl [k ][2 ]>>8 )) + c - (c >= 0xa1 ? 0x62 : 0x40 ) + cp950_pua_tbl [k ][0 ];
199
189
} else {
200
190
w = c2 - cp950_pua_tbl [k ][2 ] + cp950_pua_tbl [k ][0 ];
201
191
}
192
+ } else if (c1 == 0xA1 ) {
193
+ if (c == 0x45 ) {
194
+ w = 0x2027 ;
195
+ } else if (c == 0x4E ) {
196
+ w = 0xFE51 ;
197
+ } else if (c == 0x5A ) {
198
+ w = 0x2574 ;
199
+ } else if (c == 0xC2 ) {
200
+ w = 0x00AF ;
201
+ } else if (c == 0xC3 ) {
202
+ w = 0xFFE3 ;
203
+ } else if (c == 0xC5 ) {
204
+ w = 0x02CD ;
205
+ } else if (c == 0xE3 ) {
206
+ w = 0xFF5E ;
207
+ } else if (c == 0xF2 ) {
208
+ w = 0x2295 ;
209
+ } else if (c == 0xF3 ) {
210
+ w = 0x2299 ;
211
+ } else if (c == 0xFE ) {
212
+ w = 0xFF0F ;
213
+ }
214
+ } else if (c1 == 0xA2 ) {
215
+ if (c == 0x40 ) {
216
+ w = 0xFF3C ;
217
+ } else if (c == 0x41 ) {
218
+ w = 0x2215 ;
219
+ } else if (c == 0x42 ) {
220
+ w = 0xFE68 ;
221
+ } else if (c == 0x46 ) {
222
+ w = 0xFFE0 ;
223
+ } else if (c == 0x47 ) {
224
+ w = 0xFFE1 ;
225
+ } else if (c == 0xCC ) {
226
+ w = 0x5341 ;
227
+ } else if (c == 0xCE ) {
228
+ w = 0x5345 ;
229
+ }
202
230
}
203
231
}
204
232
205
233
if (w <= 0 ) {
206
- w = (c1 << 8 ) | c ;
207
- w &= MBFL_WCSPLANE_MASK ;
208
- w |= MBFL_WCSPLANE_BIG5 ;
234
+ w = (c1 << 8 ) | c | MBFL_WCSPLANE_BIG5 ;
209
235
}
210
236
CK ((* filter -> output_function )(w , filter -> data ));
211
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
212
- CK ((* filter -> output_function )(c , filter -> data ));
213
237
} else {
214
- w = (c1 << 8 ) | c ;
215
- w &= MBFL_WCSGROUP_MASK ;
216
- w |= MBFL_WCSGROUP_THROUGH ;
238
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
217
239
CK ((* filter -> output_function )(w , filter -> data ));
218
240
}
219
241
break ;
@@ -226,16 +248,24 @@ mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
226
248
return c ;
227
249
}
228
250
229
- /*
230
- * wchar => Big5
231
- */
232
- int
233
- mbfl_filt_conv_wchar_big5 (int c , mbfl_convert_filter * filter )
251
+ static int mbfl_filt_conv_big5_wchar_flush (mbfl_convert_filter * filter )
234
252
{
235
- int k ;
236
- int c1 , s , c2 ;
253
+ if (filter -> status == 1 ) {
254
+ /* 2-byte character was truncated */
255
+ CK ((* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data ));
256
+ }
257
+
258
+ if (filter -> flush_function ) {
259
+ (* filter -> flush_function )(filter -> data );
260
+ }
261
+
262
+ return 0 ;
263
+ }
264
+
265
+ int mbfl_filt_conv_wchar_big5 (int c , mbfl_convert_filter * filter )
266
+ {
267
+ int k , s = 0 ;
237
268
238
- s = 0 ;
239
269
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max ) {
240
270
s = ucs_a1_big5_table [c - ucs_a1_big5_table_min ];
241
271
} else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max ) {
@@ -244,8 +274,6 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
244
274
s = ucs_a3_big5_table [c - ucs_a3_big5_table_min ];
245
275
} else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max ) {
246
276
s = ucs_i_big5_table [c - ucs_i_big5_table_min ];
247
- } else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max ) {
248
- s = ucs_pua_big5_table [c - ucs_pua_big5_table_min ];
249
277
} else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max ) {
250
278
s = ucs_r1_big5_table [c - ucs_r1_big5_table_min ];
251
279
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max ) {
@@ -254,49 +282,92 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
254
282
255
283
if (filter -> to -> no_encoding == mbfl_no_encoding_cp950 ) {
256
284
if (c >= 0xe000 && c <= 0xf848 ) { /* PUA for CP950 */
257
- for (k = 0 ; k < sizeof (cp950_pua_tbl )/ (sizeof (unsigned short )* 4 ); k ++ ) {
285
+ for (k = 0 ; k < sizeof (cp950_pua_tbl ) / (sizeof (unsigned short )* 4 ); k ++ ) {
258
286
if (c <= cp950_pua_tbl [k ][1 ]) {
259
287
break ;
260
288
}
261
289
}
262
- c1 = c - cp950_pua_tbl [k ][0 ];
290
+
291
+ int c1 = c - cp950_pua_tbl [k ][0 ];
263
292
if ((cp950_pua_tbl [k ][2 ] & 0xff ) == 0x40 ) {
264
- c2 = cp950_pua_tbl [k ][2 ] >> 8 ;
265
- s = ((c1 / 157 ) + c2 ) << 8 ; c1 %= 157 ;
293
+ int c2 = cp950_pua_tbl [k ][2 ] >> 8 ;
294
+ s = ((c1 / 157 ) + c2 ) << 8 ;
295
+ c1 %= 157 ;
266
296
s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40 );
267
297
} else {
268
298
s = c1 + cp950_pua_tbl [k ][2 ];
269
299
}
270
- }
271
-
272
- if (c == 0x80 ) {
273
- s = 0x80 ;
274
- } else if (c == 0xf8f8 ) {
275
- s = 0xff ;
276
- } else if (c == 0x256d ) {
277
- s = 0xa27e ;
278
- } else if (c == 0x256e ) {
279
- s = 0xa2a1 ;
280
- } else if (c == 0x256f ) {
281
- s = 0xa2a3 ;
282
- } else if (c == 0x2570 ) {
283
- s = 0xa2a2 ;
300
+ } else if (c == 0x00A2 ) {
301
+ s = 0 ;
302
+ } else if (c == 0x00A3 ) {
303
+ s = 0 ;
304
+ } else if (c == 0x00AF ) {
305
+ s = 0xA1C2 ;
306
+ } else if (c == 0x02CD ) {
307
+ s = 0xA1C5 ;
308
+ } else if (c == 0x0401 ) {
309
+ s = 0 ;
310
+ } else if (c >= 0x0414 && c <= 0x041C ) {
311
+ s = 0 ;
312
+ } else if (c >= 0x0423 && c <= 0x044F ) {
313
+ s = 0 ;
314
+ } else if (c == 0x0451 ) {
315
+ s = 0 ;
316
+ } else if (c == 0x2022 ) {
317
+ s = 0 ;
318
+ } else if (c == 0x2027 ) {
319
+ s = 0xA145 ;
320
+ } else if (c == 0x203E ) {
321
+ s = 0 ;
322
+ } else if (c == 0x2215 ) {
323
+ s = 0xA241 ;
324
+ } else if (c == 0x223C ) {
325
+ s = 0 ;
326
+ } else if (c == 0x2295 ) {
327
+ s = 0xA1F2 ;
328
+ } else if (c == 0x2299 ) {
329
+ s = 0xA1F3 ;
330
+ } else if (c >= 0x2460 && c <= 0x247D ) {
331
+ s = 0 ;
332
+ } else if (c == 0x2574 ) {
333
+ s = 0xA15A ;
334
+ } else if (c == 0x2609 ) {
335
+ s = 0 ;
336
+ } else if (c == 0x2641 ) {
337
+ s = 0 ;
338
+ } else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF )) {
339
+ s = 0 ;
340
+ } else if (c == 0xFE51 ) {
341
+ s = 0xA14E ;
342
+ } else if (c == 0xFE68 ) {
343
+ s = 0xA242 ;
344
+ } else if (c == 0xFF3C ) {
345
+ s = 0xA240 ;
346
+ } else if (c == 0xFF5E ) {
347
+ s = 0xA1E3 ;
348
+ } else if (c == 0xFF64 ) {
349
+ s = 0 ;
350
+ } else if (c == 0xFFE0 ) {
351
+ s = 0xA246 ;
352
+ } else if (c == 0xFFE1 ) {
353
+ s = 0xA247 ;
354
+ } else if (c == 0xFFE3 ) {
355
+ s = 0xA1C3 ;
356
+ } else if (c == 0xFF0F ) {
357
+ s = 0xA1FE ;
284
358
}
285
359
}
286
360
287
361
if (s <= 0 ) {
288
- c1 = c & ~MBFL_WCSPLANE_MASK ;
289
- if (c1 == MBFL_WCSPLANE_BIG5 ) {
290
- s = c & MBFL_WCSPLANE_MASK ;
291
- }
292
362
if (c == 0 ) {
293
363
s = 0 ;
294
- } else if ( s <= 0 ) {
364
+ } else {
295
365
s = -1 ;
296
366
}
297
367
}
368
+
298
369
if (s >= 0 ) {
299
- if (s <= 0x80 || s == 0xff ) { /* latin */
370
+ if (s <= 0x80 ) { /* latin */
300
371
CK ((* filter -> output_function )(s , filter -> data ));
301
372
} else {
302
373
CK ((* filter -> output_function )((s >> 8 ) & 0xff , filter -> data ));
0 commit comments