Skip to content

Commit 17d82b6

Browse files
committed
Enhance mbstring support for UCS-2 text
- For consistency with UTF-16, UTF-32, and UCS-4, strip leading byte order marks. - Treat it as an error if string is truncated (i.e. has an odd number of bytes).
1 parent 6dd7547 commit 17d82b6

File tree

1 file changed

+39
-67
lines changed

1 file changed

+39
-67
lines changed

ext/mbstring/libmbfl/filters/mbfilter_ucs2.c

Lines changed: 39 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "mbfilter.h"
3131
#include "mbfilter_ucs2.h"
3232

33+
static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter);
34+
3335
static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
3436

3537
/* This library historically had encodings called 'byte2be' and 'byte2le'
@@ -78,7 +80,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2_wchar = {
7880
mbfl_filt_conv_common_ctor,
7981
NULL,
8082
mbfl_filt_conv_ucs2_wchar,
81-
mbfl_filt_conv_common_flush,
83+
mbfl_filt_conv_ucs2_wchar_flush,
8284
NULL,
8385
};
8486

@@ -98,7 +100,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2be_wchar = {
98100
mbfl_filt_conv_common_ctor,
99101
NULL,
100102
mbfl_filt_conv_ucs2be_wchar,
101-
mbfl_filt_conv_common_flush,
103+
mbfl_filt_conv_ucs2_wchar_flush,
102104
NULL,
103105
};
104106

@@ -118,7 +120,7 @@ const struct mbfl_convert_vtbl vtbl_ucs2le_wchar = {
118120
mbfl_filt_conv_common_ctor,
119121
NULL,
120122
mbfl_filt_conv_ucs2le_wchar,
121-
mbfl_filt_conv_common_flush,
123+
mbfl_filt_conv_ucs2_wchar_flush,
122124
NULL,
123125
};
124126

@@ -134,113 +136,83 @@ const struct mbfl_convert_vtbl vtbl_wchar_ucs2le = {
134136

135137
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
136138

137-
/*
138-
* UCS-2 => wchar
139-
*/
140139
int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter)
141140
{
142-
int n, endian;
143-
144-
endian = filter->status & 0xff00;
145-
switch (filter->status & 0xff) {
146-
case 0:
147-
if (endian) {
148-
n = c & 0xff;
149-
} else {
150-
n = (c & 0xff) << 8;
151-
}
152-
filter->cache = n;
153-
filter->status++;
154-
break;
155-
default:
156-
if (endian) {
157-
n = (c & 0xff) << 8;
141+
if (filter->status == 0) {
142+
filter->status = 1;
143+
filter->cache = c & 0xFF;
144+
} else {
145+
filter->status = 0;
146+
int n = (filter->cache << 8) | (c & 0xFF);
147+
if (n == 0xFFFE) {
148+
/* Found little-endian byte order mark */
149+
filter->filter_function = mbfl_filt_conv_ucs2le_wchar;
158150
} else {
159-
n = c & 0xff;
160-
}
161-
n |= filter->cache;
162-
if (n == 0xfffe) {
163-
if (endian) {
164-
filter->status = 0; /* big-endian */
165-
} else {
166-
filter->status = 0x100; /* little-endian */
151+
filter->filter_function = mbfl_filt_conv_ucs2be_wchar;
152+
if (n != 0xFEFF) {
153+
CK((*filter->output_function)(n, filter->data));
167154
}
168-
CK((*filter->output_function)(0xfeff, filter->data));
169-
} else {
170-
filter->status &= ~0xff;
171-
CK((*filter->output_function)(n, filter->data));
172155
}
173-
break;
174156
}
175-
176157
return c;
177158
}
178159

179-
/*
180-
* UCS-2BE => wchar
181-
*/
182160
int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter)
183161
{
184-
int n;
185-
186162
if (filter->status == 0) {
187163
filter->status = 1;
188-
n = (c & 0xff) << 8;
189-
filter->cache = n;
164+
filter->cache = (c & 0xFF) << 8;
190165
} else {
191166
filter->status = 0;
192-
n = (c & 0xff) | filter->cache;
193-
CK((*filter->output_function)(n, filter->data));
167+
CK((*filter->output_function)((c & 0xFF) | filter->cache, filter->data));
194168
}
195169
return c;
196170
}
197171

198-
/*
199-
* wchar => UCS-2BE
200-
*/
201172
int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter)
202173
{
203174
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
204-
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
205-
CK((*filter->output_function)(c & 0xff, filter->data));
175+
CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
176+
CK((*filter->output_function)(c & 0xFF, filter->data));
206177
} else {
207178
CK(mbfl_filt_conv_illegal_output(c, filter));
208179
}
209-
210180
return c;
211181
}
212182

213-
/*
214-
* UCS-2LE => wchar
215-
*/
216183
int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter)
217184
{
218-
int n;
219-
220185
if (filter->status == 0) {
221186
filter->status = 1;
222-
n = c & 0xff;
223-
filter->cache = n;
187+
filter->cache = c & 0xFF;
224188
} else {
225189
filter->status = 0;
226-
n = ((c & 0xff) << 8) | filter->cache;
227-
CK((*filter->output_function)(n, filter->data));
190+
CK((*filter->output_function)(((c & 0xFF) << 8) | filter->cache, filter->data));
228191
}
229192
return c;
230193
}
231194

232-
233-
/*
234-
* wchar => UCS-2LE
235-
*/
236195
int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter)
237196
{
238197
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
239-
CK((*filter->output_function)(c & 0xff, filter->data));
240-
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
198+
CK((*filter->output_function)(c & 0xFF, filter->data));
199+
CK((*filter->output_function)((c >> 8) & 0xFF, filter->data));
241200
} else {
242201
CK(mbfl_filt_conv_illegal_output(c, filter));
243202
}
244-
245203
return c;
246204
}
205+
206+
static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter)
207+
{
208+
if (filter->status) {
209+
/* Input string was truncated */
210+
CK((*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data));
211+
}
212+
213+
if (filter->flush_function) {
214+
(*filter->flush_function)(filter->data);
215+
}
216+
217+
return 0;
218+
}

0 commit comments

Comments
 (0)