Skip to content

Commit 290efe8

Browse files
committed
Adjust code which checks if encoding is ISO-8859-9 when converting case
Instead of checking the 'encoding number' to see if we are converting case for ISO-8859-9 text, compare pointers instead. This should free up 1 register in php_unicode_convert_case.
1 parent 39b46a5 commit 290efe8

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

ext/mbstring/php_unicode.c

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "php_unicode.h"
3636
#include "unicode_data.h"
3737

38+
extern const mbfl_encoding mbfl_encoding_8859_9;
39+
3840
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
3941

4042
static bool prop_lookup(unsigned long code, unsigned long n)
@@ -118,14 +120,14 @@ static inline unsigned mph_lookup(
118120
mph_lookup(code, _uccase_##type##_g, _uccase_##type##_g_size, \
119121
_uccase_##type##_table, _uccase_##type##_table_size)
120122

121-
static unsigned php_unicode_toupper_raw(unsigned code, enum mbfl_no_encoding enc)
123+
static unsigned php_unicode_toupper_raw(unsigned code, const mbfl_encoding *enc)
122124
{
123125
/* After the ASCII characters, the first codepoint with an uppercase version
124126
* is 0xB5 (MICRO SIGN) */
125127
if (code < 0xB5) {
126128
/* Fast path for ASCII */
127129
if (code >= 0x61 && code <= 0x7A) {
128-
if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x69)) {
130+
if (UNEXPECTED(enc == &mbfl_encoding_8859_9 && code == 0x69)) {
129131
return 0x130;
130132
}
131133
return code - 0x20;
@@ -140,14 +142,14 @@ static unsigned php_unicode_toupper_raw(unsigned code, enum mbfl_no_encoding enc
140142
}
141143
}
142144

143-
static unsigned php_unicode_tolower_raw(unsigned code, enum mbfl_no_encoding enc)
145+
static unsigned php_unicode_tolower_raw(unsigned code, const mbfl_encoding *enc)
144146
{
145147
/* After the ASCII characters, the first codepoint with a lowercase version
146148
* is 0xC0 (LATIN CAPITAL LETTER A WITH GRAVE) */
147149
if (code < 0xC0) {
148150
/* Fast path for ASCII */
149151
if (code >= 0x41 && code <= 0x5A) {
150-
if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x0049L)) {
152+
if (UNEXPECTED(enc == &mbfl_encoding_8859_9 && code == 0x0049L)) {
151153
return 0x0131L;
152154
}
153155
return code + 0x20;
@@ -156,7 +158,7 @@ static unsigned php_unicode_tolower_raw(unsigned code, enum mbfl_no_encoding enc
156158
} else {
157159
unsigned new_code = CASE_LOOKUP(code, lower);
158160
if (new_code != CODE_NOT_FOUND) {
159-
if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x130)) {
161+
if (UNEXPECTED(enc == &mbfl_encoding_8859_9 && code == 0x130)) {
160162
return 0x69;
161163
}
162164
return new_code;
@@ -165,7 +167,7 @@ static unsigned php_unicode_tolower_raw(unsigned code, enum mbfl_no_encoding enc
165167
}
166168
}
167169

168-
static unsigned php_unicode_totitle_raw(unsigned code, enum mbfl_no_encoding enc)
170+
static unsigned php_unicode_totitle_raw(unsigned code, const mbfl_encoding *enc)
169171
{
170172
unsigned new_code = CASE_LOOKUP(code, title);
171173
if (new_code != CODE_NOT_FOUND) {
@@ -176,12 +178,12 @@ static unsigned php_unicode_totitle_raw(unsigned code, enum mbfl_no_encoding enc
176178
return php_unicode_toupper_raw(code, enc);
177179
}
178180

179-
static unsigned php_unicode_tofold_raw(unsigned code, enum mbfl_no_encoding enc)
181+
static unsigned php_unicode_tofold_raw(unsigned code, const mbfl_encoding *enc)
180182
{
181183
if (code < 0x80) {
182184
/* Fast path for ASCII */
183185
if (code >= 0x41 && code <= 0x5A) {
184-
if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x49)) {
186+
if (UNEXPECTED(enc == &mbfl_encoding_8859_9 && code == 0x49)) {
185187
return 0x131;
186188
}
187189
return code + 0x20;
@@ -190,7 +192,7 @@ static unsigned php_unicode_tofold_raw(unsigned code, enum mbfl_no_encoding enc)
190192
} else {
191193
unsigned new_code = CASE_LOOKUP(code, fold);
192194
if (new_code != CODE_NOT_FOUND) {
193-
if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x130)) {
195+
if (UNEXPECTED(enc == &mbfl_encoding_8859_9 && code == 0x130)) {
194196
return 0x69;
195197
}
196198
return new_code;
@@ -199,28 +201,28 @@ static unsigned php_unicode_tofold_raw(unsigned code, enum mbfl_no_encoding enc)
199201
}
200202
}
201203

202-
static inline unsigned php_unicode_tolower_simple(unsigned code, enum mbfl_no_encoding enc) {
204+
static inline unsigned php_unicode_tolower_simple(unsigned code, const mbfl_encoding *enc) {
203205
code = php_unicode_tolower_raw(code, enc);
204206
if (UNEXPECTED(code > 0xffffff)) {
205207
return _uccase_extra_table[code & 0xffffff];
206208
}
207209
return code;
208210
}
209-
static inline unsigned php_unicode_toupper_simple(unsigned code, enum mbfl_no_encoding enc) {
211+
static inline unsigned php_unicode_toupper_simple(unsigned code, const mbfl_encoding *enc) {
210212
code = php_unicode_toupper_raw(code, enc);
211213
if (UNEXPECTED(code > 0xffffff)) {
212214
return _uccase_extra_table[code & 0xffffff];
213215
}
214216
return code;
215217
}
216-
static inline unsigned php_unicode_totitle_simple(unsigned code, enum mbfl_no_encoding enc) {
218+
static inline unsigned php_unicode_totitle_simple(unsigned code, const mbfl_encoding *enc) {
217219
code = php_unicode_totitle_raw(code, enc);
218220
if (UNEXPECTED(code > 0xffffff)) {
219221
return _uccase_extra_table[code & 0xffffff];
220222
}
221223
return code;
222224
}
223-
static inline unsigned php_unicode_tofold_simple(unsigned code, enum mbfl_no_encoding enc) {
225+
static inline unsigned php_unicode_tofold_simple(unsigned code, const mbfl_encoding *enc) {
224226
code = php_unicode_tofold_raw(code, enc);
225227
if (UNEXPECTED(code > 0xffffff)) {
226228
return _uccase_extra_table[code & 0xffffff];
@@ -284,7 +286,6 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
284286
uint32_t wchar_buf[64], converted_buf[192];
285287
unsigned int state = 0, title_mode = 0;
286288
unsigned char *in = (unsigned char*)srcstr;
287-
enum mbfl_no_encoding enc = src_encoding->no_encoding;
288289
/* In rare cases, we need to scan backwards through the previously converted codepoints to see
289290
* if special conversion rules should be used for the Greek letter sigma */
290291
uint32_t *converted_end = NULL;
@@ -302,21 +303,21 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
302303
case PHP_UNICODE_CASE_UPPER_SIMPLE:
303304
for (int i = 0; i < out_len; i++) {
304305
uint32_t w = wchar_buf[i];
305-
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_toupper_simple(w, enc);
306+
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_toupper_simple(w, src_encoding);
306307
}
307308
break;
308309

309310
case PHP_UNICODE_CASE_LOWER_SIMPLE:
310311
for (int i = 0; i < out_len; i++) {
311312
uint32_t w = wchar_buf[i];
312-
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_tolower_simple(w, enc);
313+
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_tolower_simple(w, src_encoding);
313314
}
314315
break;
315316

316317
case PHP_UNICODE_CASE_FOLD_SIMPLE:
317318
for (int i = 0; i < out_len; i++) {
318319
uint32_t w = wchar_buf[i];
319-
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_tofold_simple(w, enc);
320+
*p++ = (UNEXPECTED(w > 0xFFFFFF)) ? w : php_unicode_tofold_simple(w, src_encoding);
320321
}
321322
break;
322323

@@ -327,7 +328,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
327328
*p++ = w;
328329
continue;
329330
}
330-
*p++ = title_mode ? php_unicode_tolower_simple(w, enc) : php_unicode_totitle_simple(w, enc);
331+
*p++ = title_mode ? php_unicode_tolower_simple(w, src_encoding) : php_unicode_totitle_simple(w, src_encoding);
331332
if (!php_unicode_is_case_ignorable(w)) {
332333
title_mode = php_unicode_is_cased(w);
333334
}
@@ -341,7 +342,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
341342
*p++ = w;
342343
continue;
343344
}
344-
w = php_unicode_toupper_raw(w, enc);
345+
w = php_unicode_toupper_raw(w, src_encoding);
345346
if (UNEXPECTED(w > 0xFFFFFF)) {
346347
p = emit_special_casing_sequence(w, p);
347348
} else {
@@ -394,7 +395,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
394395
}
395396
}
396397
}
397-
w = php_unicode_tolower_raw(w, enc);
398+
w = php_unicode_tolower_raw(w, src_encoding);
398399
if (UNEXPECTED(w > 0xFFFFFF)) {
399400
p = emit_special_casing_sequence(w, p);
400401
} else {
@@ -410,7 +411,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
410411
*p++ = w;
411412
continue;
412413
}
413-
w = php_unicode_tofold_raw(w, enc);
414+
w = php_unicode_tofold_raw(w, src_encoding);
414415
if (UNEXPECTED(w > 0xFFFFFF)) {
415416
p = emit_special_casing_sequence(w, p);
416417
} else {
@@ -426,7 +427,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons
426427
*p++ = w;
427428
continue;
428429
}
429-
uint32_t w2 = title_mode ? php_unicode_tolower_raw(w, enc) : php_unicode_totitle_raw(w, enc);
430+
uint32_t w2 = title_mode ? php_unicode_tolower_raw(w, src_encoding) : php_unicode_totitle_raw(w, src_encoding);
430431
if (UNEXPECTED(w2 > 0xFFFFFF)) {
431432
p = emit_special_casing_sequence(w2, p);
432433
} else {

0 commit comments

Comments
 (0)