Skip to content

Commit 6e4395b

Browse files
committed
ext/intl further fixes
1 parent 9c81da4 commit 6e4395b

File tree

7 files changed

+76
-73
lines changed

7 files changed

+76
-73
lines changed

ext/intl/grapheme/grapheme_string.c

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,23 @@ void grapheme_register_constants( INIT_FUNC_ARGS )
5555
PHP_FUNCTION(grapheme_strlen)
5656
{
5757
unsigned char* string;
58-
int string_len;
58+
zend_str_size_int string_len;
5959
UChar* ustring = NULL;
60-
int ustring_len = 0;
60+
zend_str_size_int ustring_len = 0;
6161
int ret_len;
6262
UErrorCode status;
6363

64-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string, &string_len) == FAILURE) {
64+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S", (char **)&string, &string_len) == FAILURE) {
6565

6666
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
6767
"grapheme_strlen: unable to parse input param", 0 TSRMLS_CC );
6868

6969
RETURN_FALSE;
7070
}
7171

72-
ret_len = grapheme_ascii_check(string, string_len);
73-
74-
if ( ret_len >= 0 )
75-
RETURN_LONG(ret_len);
72+
if (grapheme_ascii_check(string, string_len)) {
73+
RETURN_LONG(string_len);
74+
}
7675

7776
/* convert the string to UTF-16. */
7877
status = U_ZERO_ERROR;
@@ -109,13 +108,13 @@ PHP_FUNCTION(grapheme_strlen)
109108
PHP_FUNCTION(grapheme_strpos)
110109
{
111110
unsigned char *haystack, *needle;
112-
int haystack_len, needle_len;
111+
zend_str_size_int haystack_len, needle_len;
113112
unsigned char *found;
114113
long loffset = 0;
115114
int32_t offset = 0;
116115
int ret_pos;
117116

118-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
117+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
119118

120119
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
121120
"grapheme_strpos: unable to parse input param", 0 TSRMLS_CC );
@@ -154,7 +153,7 @@ PHP_FUNCTION(grapheme_strpos)
154153
}
155154

156155
/* if it is there, and if the haystack is ascii, we are all done */
157-
if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
156+
if (grapheme_ascii_check(haystack, haystack_len)) {
158157

159158
RETURN_LONG(found - haystack);
160159
}
@@ -176,14 +175,14 @@ PHP_FUNCTION(grapheme_strpos)
176175
PHP_FUNCTION(grapheme_stripos)
177176
{
178177
unsigned char *haystack, *needle, *haystack_dup, *needle_dup;
179-
int haystack_len, needle_len;
178+
zend_str_size_int haystack_len, needle_len;
180179
unsigned char *found;
181180
long loffset = 0;
182181
int32_t offset = 0;
183182
int ret_pos;
184183
int is_ascii;
185184

186-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
185+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
187186

188187
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
189188
"grapheme_stripos: unable to parse input param", 0 TSRMLS_CC );
@@ -211,7 +210,7 @@ PHP_FUNCTION(grapheme_stripos)
211210
}
212211

213212

214-
is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 );
213+
is_ascii = grapheme_ascii_check(haystack, haystack_len);
215214

216215
if ( is_ascii ) {
217216
needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
@@ -229,7 +228,7 @@ PHP_FUNCTION(grapheme_stripos)
229228
}
230229

231230
/* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
232-
if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
231+
if ( grapheme_ascii_check(needle, needle_len) ) {
233232
RETURN_FALSE;
234233
}
235234
}
@@ -251,13 +250,13 @@ PHP_FUNCTION(grapheme_stripos)
251250
PHP_FUNCTION(grapheme_strrpos)
252251
{
253252
unsigned char *haystack, *needle;
254-
int haystack_len, needle_len;
253+
zend_str_size_int haystack_len, needle_len;
255254
long loffset = 0;
256255
int32_t offset = 0;
257256
int32_t ret_pos;
258257
int is_ascii;
259258

260-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
259+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
261260

262261
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
263262
"grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
@@ -284,7 +283,7 @@ PHP_FUNCTION(grapheme_strrpos)
284283
RETURN_FALSE;
285284
}
286285

287-
is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
286+
is_ascii = grapheme_ascii_check(haystack, haystack_len);
288287

289288
if ( is_ascii ) {
290289

@@ -297,7 +296,7 @@ PHP_FUNCTION(grapheme_strrpos)
297296

298297
/* if the needle was ascii too, we are done */
299298

300-
if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
299+
if ( grapheme_ascii_check(needle, needle_len) ) {
301300
RETURN_FALSE;
302301
}
303302

@@ -321,13 +320,13 @@ PHP_FUNCTION(grapheme_strrpos)
321320
PHP_FUNCTION(grapheme_strripos)
322321
{
323322
unsigned char *haystack, *needle;
324-
int haystack_len, needle_len;
323+
zend_str_size_int haystack_len, needle_len;
325324
long loffset = 0;
326325
int32_t offset = 0;
327326
int32_t ret_pos;
328327
int is_ascii;
329328

330-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
329+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
331330

332331
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
333332
"grapheme_strrpos: unable to parse input param", 0 TSRMLS_CC );
@@ -354,7 +353,7 @@ PHP_FUNCTION(grapheme_strripos)
354353
RETURN_FALSE;
355354
}
356355

357-
is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
356+
is_ascii = grapheme_ascii_check(haystack, haystack_len);
358357

359358
if ( is_ascii ) {
360359
unsigned char *needle_dup, *haystack_dup;
@@ -375,7 +374,7 @@ PHP_FUNCTION(grapheme_strripos)
375374

376375
/* if the needle was ascii too, we are done */
377376

378-
if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
377+
if ( grapheme_ascii_check(needle, needle_len) ) {
379378
RETURN_FALSE;
380379
}
381380

@@ -400,7 +399,7 @@ PHP_FUNCTION(grapheme_substr)
400399
{
401400
unsigned char *str, *sub_str;
402401
UChar *ustr;
403-
int str_len, sub_str_len, ustr_len;
402+
zend_str_size_int str_len, sub_str_len, ustr_len;
404403
long lstart = 0, length = 0;
405404
int32_t start = 0;
406405
int iter_val;
@@ -410,7 +409,7 @@ PHP_FUNCTION(grapheme_substr)
410409
int sub_str_start_pos, sub_str_end_pos;
411410
int32_t (*iter_func)(UBreakIterator *);
412411

413-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|l", (char **)&str, &str_len, &lstart, &length) == FAILURE) {
412+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Sl|l", (char **)&str, &str_len, &lstart, &length) == FAILURE) {
414413

415414
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
416415
"grapheme_substr: unable to parse input param", 0 TSRMLS_CC );
@@ -430,7 +429,7 @@ PHP_FUNCTION(grapheme_substr)
430429

431430
/* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
432431

433-
if ( grapheme_ascii_check(str, str_len) >= 0 ) {
432+
if ( grapheme_ascii_check(str, str_len) ) {
434433
grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);
435434

436435
if ( NULL == sub_str ) {
@@ -613,11 +612,11 @@ PHP_FUNCTION(grapheme_substr)
613612
static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
614613
{
615614
unsigned char *haystack, *needle, *found;
616-
int haystack_len, needle_len;
615+
zend_str_size_int haystack_len, needle_len;
617616
int ret_pos, uchar_pos;
618617
zend_bool part = 0;
619618

620-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|b", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &part) == FAILURE) {
619+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|b", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &part) == FAILURE) {
621620

622621
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
623622
"grapheme_strstr: unable to parse input param", 0 TSRMLS_CC );
@@ -646,7 +645,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
646645
}
647646

648647
/* if it is there, and if the haystack is ascii, we are all done */
649-
if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
648+
if ( grapheme_ascii_check(haystack, haystack_len) ) {
650649
size_t found_offset = found - haystack;
651650

652651
if (part) {
@@ -810,7 +809,7 @@ PHP_FUNCTION(grapheme_extract)
810809
{
811810
unsigned char *str, *pstr;
812811
UChar *ustr;
813-
int str_len, ustr_len;
812+
zend_str_size_int str_len, ustr_len;
814813
long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
815814
long lstart = 0; /* starting position in str in bytes */
816815
int32_t start = 0;
@@ -821,7 +820,7 @@ PHP_FUNCTION(grapheme_extract)
821820
int ret_pos;
822821
zval *next = NULL; /* return offset of next part of the string */
823822

824-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|llz", (char **)&str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
823+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Sl|llz", (char **)&str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
825824

826825
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
827826
"grapheme_extract: unable to parse input param", 0 TSRMLS_CC );
@@ -890,7 +889,7 @@ PHP_FUNCTION(grapheme_extract)
890889
(size + 1 because the size-th character might be the beginning of a grapheme cluster)
891890
*/
892891

893-
if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
892+
if ( grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
894893
long nsize = ( size < str_len ? size : str_len );
895894
if ( NULL != next ) {
896895
ZVAL_LONG(next, start+nsize);

ext/intl/grapheme/grapheme_util.c

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,34 +48,35 @@ grapheme_close_global_iterator( TSRMLS_D )
4848
}
4949
/* }}} */
5050

51+
/* XXX that's the same mess we have in substr(), revise it with care when int64 is integrated and get rid of this ugly casts */
5152
/* {{{ grapheme_substr_ascii f='from' - starting point, l='length' */
52-
void grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub_str, int *sub_str_len)
53+
void grapheme_substr_ascii(char *str, zend_str_size_int str_len, long f, long l, int argc, char **sub_str, zend_str_size_int *sub_str_len)
5354
{
5455
*sub_str = NULL;
5556

5657
if (argc > 2) {
5758
if ((l < 0 && -l > str_len)) {
5859
return;
59-
} else if (l > str_len) {
60+
} else if (l > (long)str_len) {
6061
l = str_len;
6162
}
6263
} else {
6364
l = str_len;
6465
}
6566

66-
if (f > str_len || (f < 0 && -f > str_len)) {
67+
if (f > (long)str_len || (f < 0 && -f > str_len)) {
6768
return;
6869
}
6970

70-
if (l < 0 && (l + str_len - f) < 0) {
71+
if (l < 0 && (l + (long)str_len - f) < 0) {
7172
return;
7273
}
7374

7475
/* if "from" position is negative, count start position from the end
7576
* of the string
7677
*/
7778
if (f < 0) {
78-
f = str_len + f;
79+
f = (long)str_len + f;
7980
if (f < 0) {
8081
f = 0;
8182
}
@@ -86,17 +87,17 @@ void grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char
8687
* needed to stop that many chars from the end of the string
8788
*/
8889
if (l < 0) {
89-
l = (str_len - f) + l;
90+
l = ((long)str_len - f) + l;
9091
if (l < 0) {
9192
l = 0;
9293
}
9394
}
9495

95-
if (f >= str_len) {
96+
if (f >= (long)str_len) {
9697
return;
9798
}
9899

99-
if ((f + l) > str_len) {
100+
if ((f + l) > (long)str_len) {
100101
l = str_len - f;
101102
}
102103

@@ -131,7 +132,8 @@ void grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char
131132
int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last TSRMLS_DC)
132133
{
133134
UChar *uhaystack = NULL, *uneedle = NULL;
134-
int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
135+
zend_str_size_int uhaystack_len = 0, uneedle_len = 0;
136+
int32_t char_pos, ret_pos, offset_pos = 0;
135137
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
136138
UBreakIterator* bi = NULL;
137139
UErrorCode status;
@@ -217,15 +219,17 @@ int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigne
217219
/* }}} */
218220

219221
/* {{{ grapheme_ascii_check: ASCII check */
220-
int grapheme_ascii_check(const unsigned char *day, int32_t len)
222+
int grapheme_ascii_check(const unsigned char *day, zend_str_size_int len)
221223
{
222-
int ret_len = len;
223-
while ( len-- ) {
224-
if ( *day++ > 0x7f )
225-
return -1;
224+
zend_str_size_int i = 0;
225+
226+
while ( i++ < len ) {
227+
if ( *day++ > 0x7f ) {
228+
return 0;
229+
}
226230
}
227231

228-
return ret_len;
232+
return 1;
229233
}
230234

231235
/* }}} */

ext/intl/grapheme/grapheme_util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
/* get_break_interator: get a break iterator from the global structure */
2424
UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
2525

26-
void grapheme_substr_ascii(char *str, int32_t str_len, int32_t f, int32_t l, int argc, char **sub_str, int *sub_str_len);
26+
void grapheme_substr_ascii(char *str, zend_str_size_int str_len, long f, long l, int argc, char **sub_str, zend_str_size_int *sub_str_len);
2727

2828
int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC);
2929

3030
int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last TSRMLS_DC);
3131

32-
int grapheme_ascii_check(const unsigned char *day, int32_t len);
32+
int grapheme_ascii_check(const unsigned char *day, zend_str_size_int len);
3333

3434
int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC );
3535

ext/intl/idn/idn.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,15 @@ static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
202202
#endif
203203

204204
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
205-
const char *domain, int domain_len, uint32_t option, int mode)
205+
const char *domain, zend_str_size_int domain_len, uint32_t option, int mode)
206206
{
207207
UChar* ustring = NULL;
208-
int ustring_len = 0;
208+
zend_str_size_int ustring_len = 0;
209209
UErrorCode status;
210210
char *converted_utf8;
211-
int32_t converted_utf8_len;
211+
zend_str_size_int converted_utf8_len;
212212
UChar converted[MAXPATHLEN];
213-
int32_t converted_ret_len;
213+
zend_str_size_int converted_ret_len;
214214

215215
/* convert the string to UTF-16. */
216216
status = U_ZERO_ERROR;
@@ -262,14 +262,14 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
262262
static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
263263
{
264264
char *domain;
265-
int domain_len;
265+
zend_str_size_int domain_len;
266266
long option = 0,
267267
variant = INTL_IDN_VARIANT_2003;
268268
zval *idna_info = NULL;
269269

270270
intl_error_reset(NULL TSRMLS_CC);
271271

272-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
272+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S|llz",
273273
&domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
274274
php_intl_bad_args("bad arguments", mode TSRMLS_CC);
275275
RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */

0 commit comments

Comments
 (0)