Skip to content

Commit b8f0a5a

Browse files
committed
Align grapheme_substr() behavior with substr()
1 parent c435fff commit b8f0a5a

File tree

6 files changed

+113
-112
lines changed

6 files changed

+113
-112
lines changed

ext/intl/grapheme/grapheme_string.c

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -371,22 +371,20 @@ PHP_FUNCTION(grapheme_substr)
371371
RETURN_THROWS();
372372
}
373373

374-
if ( OUTSIDE_STRING(lstart, str_len)) {
375-
zend_argument_value_error(2, "must be contained in argument #1 ($string)");
374+
if (lstart < INT32_MIN || lstart > INT32_MAX) {
375+
zend_argument_value_error(2, "is too large");
376376
RETURN_THROWS();
377377
}
378378

379-
/* we checked that it will fit: */
380379
start = (int32_t) lstart;
381380

382-
if(no_length) {
381+
if (no_length) {
383382
length = str_len;
384383
}
385384

386-
if(length < INT32_MIN) {
387-
length = INT32_MIN;
388-
} else if(length > INT32_MAX) {
389-
length = INT32_MAX;
385+
if (length < INT32_MIN || length > INT32_MAX) {
386+
zend_argument_value_error(3, "is too large");
387+
RETURN_THROWS();
390388
}
391389

392390
/* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
@@ -451,15 +449,17 @@ PHP_FUNCTION(grapheme_substr)
451449
start += iter_val;
452450
}
453451

454-
if ( 0 != start || sub_str_start_pos >= ustr_len ) {
455-
456-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
457-
458-
if (ustr) {
459-
efree(ustr);
452+
if (0 != start) {
453+
if (start > 0) {
454+
if (ustr) {
455+
efree(ustr);
456+
}
457+
ubrk_close(bi);
458+
RETURN_EMPTY_STRING();
460459
}
461-
ubrk_close(bi);
462-
RETURN_FALSE;
460+
461+
sub_str_start_pos = 0;
462+
ubrk_first(bi);
463463
}
464464

465465
/* OK to convert here since if str_len were big, convert above would fail */
@@ -526,20 +526,17 @@ PHP_FUNCTION(grapheme_substr)
526526
ubrk_close(bi);
527527

528528
if ( UBRK_DONE == sub_str_end_pos) {
529-
if(length < 0) {
530-
zend_argument_value_error(3, "must be contained in argument #1 ($string)");
529+
if (length < 0) {
531530
efree(ustr);
532-
RETURN_THROWS();
531+
RETURN_EMPTY_STRING();
533532
} else {
534533
sub_str_end_pos = ustr_len;
535534
}
536535
}
537536

538-
if(sub_str_start_pos > sub_str_end_pos) {
539-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
540-
537+
if (sub_str_start_pos > sub_str_end_pos) {
541538
efree(ustr);
542-
RETURN_FALSE;
539+
RETURN_EMPTY_STRING();
543540
}
544541

545542
status = U_ZERO_ERROR;

ext/intl/grapheme/grapheme_util.c

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,6 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
5757
return;
5858
}
5959

60-
if ((l < 0 && -l > str_len2)) {
61-
return;
62-
} else if (l > 0 && l > str_len2) {
63-
l = str_len2;
64-
}
65-
66-
if (f > str_len2 || (f < 0 && -f > str_len2)) {
67-
return;
68-
}
69-
70-
if (l < 0 && str_len2 < f - l) {
71-
return;
72-
}
73-
7460
/* if "from" position is negative, count start position from the end
7561
* of the string
7662
*/
@@ -79,8 +65,9 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
7965
if (f < 0) {
8066
f = 0;
8167
}
82-
}
83-
68+
} else if (f > str_len2) {
69+
f = str_len2;
70+
}
8471

8572
/* if "length" position is negative, set it to the length
8673
* needed to stop that many chars from the end of the string
@@ -90,20 +77,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
9077
if (l < 0) {
9178
l = 0;
9279
}
93-
}
94-
95-
if (f >= str_len2) {
96-
return;
97-
}
98-
99-
if ((f + l) > str_len2) {
100-
l = str_len - f;
101-
}
80+
} else if (l > str_len2 - f) {
81+
l = str_len2 - f;
82+
}
10283

10384
*sub_str = str + f;
10485
*sub_str_len = l;
105-
106-
return;
10786
}
10887
/* }}} */
10988

ext/intl/tests/bug62759.phpt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ var_dump(grapheme_substr('déjà', -1, 0));
1616
--EXPECT--
1717
string(0) ""
1818
string(0) ""
19-
bool(false)
20-
string(61) "grapheme_substr: invalid parameters: U_ILLEGAL_ARGUMENT_ERROR"
2119
string(0) ""
22-
bool(false)
23-
string(65) "grapheme_substr: length is beyond start: U_ILLEGAL_ARGUMENT_ERROR"
20+
string(12) "U_ZERO_ERROR"
21+
string(0) ""
22+
string(0) ""
23+
string(12) "U_ZERO_ERROR"
2424
string(0) ""

ext/intl/tests/grapheme.phpt

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -325,23 +325,23 @@ function ut_main()
325325

326326
$tests = array(
327327

328-
array( "abc", 3, "false" ),
329-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
328+
array( "abc", 3, "" ),
329+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
330330
array( "ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 2, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
331331
array( $char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", 2, "a" . $char_A_ring_nfd . "bc" ),
332332
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 5, "O" ),
333-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
333+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
334334
array( "a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, 4, $char_O_diaeresis_nfd ),
335335
array( $char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", 2, $char_a_ring_nfd . "bc" ),
336336
array( "a" . $char_A_ring_nfd . "bc", 1, $char_A_ring_nfd . "bc" ),
337-
array( "Abc", -5, "false" ),
338-
array( $char_a_ring_nfd . "bc", 3, "false" ),
339-
array( "abc", 4, "false" ),
337+
array( "Abc", -5, "Abc" ),
338+
array( $char_a_ring_nfd . "bc", 3, "" ),
339+
array( "abc", 4, "" ),
340340
array( "abC", 2, "C" ),
341341
array( "abc", 1, "bc" ),
342342
array( "Abc", 1, 1, "b" ),
343343
array( "abc", 0, 2, "ab" ),
344-
array( "Abc", -4, 1, "false" ),
344+
array( "Abc", -4, 1, "A" ),
345345
array( "ababc", 1, 2, "ba" ),
346346
array( "ababc", 0, 10, "ababc" ),
347347

@@ -350,7 +350,7 @@ function ut_main()
350350
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -1, "Op" ),
351351
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -2, "O" ),
352352
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -3, "" ),
353-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "false" ),
353+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "" ),
354354

355355
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
356356
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@@ -361,7 +361,7 @@ function ut_main()
361361
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -6, "a" . $char_a_ring_nfd ),
362362
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -7, "a" ),
363363
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -8, "" ),
364-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "false" ),
364+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "" ),
365365

366366
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
367367
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -7, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
@@ -371,7 +371,7 @@ function ut_main()
371371
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -3, "Opq" ),
372372
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -2, "pq" ),
373373
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -1, "q" ),
374-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "false" ),
374+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
375375

376376
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
377377
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 7, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@@ -382,7 +382,7 @@ function ut_main()
382382
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 2, "a" . $char_a_ring_nfd ),
383383
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 1, "a" ),
384384
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 0, "" ),
385-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "false" ),
385+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "" ),
386386

387387
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
388388
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -2, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
@@ -392,7 +392,7 @@ function ut_main()
392392
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -6, "a" . $char_a_ring_nfd ),
393393
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -7, "a" ),
394394
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -8, "" ),
395-
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "false" ),
395+
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "" ),
396396

397397
);
398398

@@ -973,34 +973,31 @@ find "a%CC%8ABca%CC%8A" in "o%CC%88a%CC%8AaA%CC%8AbCa%CC%8Adef" - grapheme_strri
973973

974974
function grapheme_substr($string, $start, $length = -1) {}
975975

976-
substring of "abc" from "3" - grapheme_substr = false == false
977-
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
976+
substring of "abc" from "3" - grapheme_substr = ==
977+
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
978978
substring of "aoa%CC%8Abco%CC%88O" from "2" - grapheme_substr = a%CC%8Abco%CC%88O == a%CC%8Abco%CC%88O
979979
substring of "o%CC%88a%CC%8AaA%CC%8Abc" from "2" - grapheme_substr = aA%CC%8Abc == aA%CC%8Abc
980980
substring of "aa%CC%8Abco%CC%88O" from "5" - grapheme_substr = O == O
981-
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
981+
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
982982
substring of "aa%CC%8AbcO%CC%88" from "4" - grapheme_substr = O%CC%88 == O%CC%88
983983
substring of "o%CC%88aa%CC%8Abc" from "2" - grapheme_substr = a%CC%8Abc == a%CC%8Abc
984984
substring of "aA%CC%8Abc" from "1" - grapheme_substr = A%CC%8Abc == A%CC%8Abc
985-
substring of "Abc" from "-5" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
986-
= A%CC%8Abc == false **FAILED**
987-
substring of "a%CC%8Abc" from "3" - grapheme_substr = false == false
988-
substring of "abc" from "4" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
989-
= false == false
985+
substring of "Abc" from "-5" - grapheme_substr = Abc == Abc
986+
substring of "a%CC%8Abc" from "3" - grapheme_substr = ==
987+
substring of "abc" from "4" - grapheme_substr = ==
990988
substring of "abC" from "2" - grapheme_substr = C == C
991989
substring of "abc" from "1" - grapheme_substr = bc == bc
992990
substring of "Abc" from "1" - grapheme_substr with length 1 = b == b
993991
substring of "abc" from "0" - grapheme_substr with length 2 = ab == ab
994-
substring of "Abc" from "-4" - grapheme_substr with length 1: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
995-
= ab == false **FAILED**
992+
substring of "Abc" from "-4" - grapheme_substr with length 1 = A == A
996993
substring of "ababc" from "1" - grapheme_substr with length 2 = ba == ba
997994
substring of "ababc" from "0" - grapheme_substr with length 10 = ababc == ababc
998995
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length 10 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
999996
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr = Opq == Opq
1000997
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -1 = Op == Op
1001998
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -2 = O == O
1002999
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -3 = ==
1003-
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = false == false
1000+
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = ==
10041001
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
10051002
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
10061003
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@@ -1010,8 +1007,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -5 =
10101007
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
10111008
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -7 = a == a
10121009
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -8 = ==
1013-
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
1014-
= == false **FAILED**
1010+
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9 = ==
10151011
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
10161012
substring of "aa%CC%8Abco%CC%88Opq" from "-7" - grapheme_substr = a%CC%8Abco%CC%88Opq == a%CC%8Abco%CC%88Opq
10171013
substring of "aa%CC%8Abco%CC%88Opq" from "-6" - grapheme_substr = bco%CC%88Opq == bco%CC%88Opq
@@ -1020,8 +1016,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-4" - grapheme_substr = o%CC%88Opq ==
10201016
substring of "aa%CC%8Abco%CC%88Opq" from "-3" - grapheme_substr = Opq == Opq
10211017
substring of "aa%CC%8Abco%CC%88Opq" from "-2" - grapheme_substr = pq == pq
10221018
substring of "aa%CC%8Abco%CC%88Opq" from "-1" - grapheme_substr = q == q
1023-
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
1024-
= q == false **FAILED**
1019+
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
10251020
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 8 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
10261021
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 7 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
10271022
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 6 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@@ -1031,8 +1026,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 3 =
10311026
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 2 = aa%CC%8A == aa%CC%8A
10321027
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 1 = a == a
10331028
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 0 = ==
1034-
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
1035-
= == false **FAILED**
1029+
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999 = ==
10361030
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
10371031
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
10381032
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -3 = aa%CC%8Abco%CC%88 == aa%CC%8Abco%CC%88
@@ -1041,8 +1035,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -5 =
10411035
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
10421036
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -7 = a == a
10431037
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -8 = ==
1044-
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
1045-
= == false **FAILED**
1038+
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9 = ==
10461039

10471040
function grapheme_strstr($haystack, $needle, $before_needle = FALSE) {}
10481041

0 commit comments

Comments
 (0)