diff --git a/UPGRADING b/UPGRADING index ff6d4bc048f94..f9e784357effd 100644 --- a/UPGRADING +++ b/UPGRADING @@ -109,6 +109,10 @@ PHP 8.5 UPGRADE NOTES . IntlDateFormatter::setTimeZone()/datefmt_set_timezone() throws an IntlException on uninitialised classes/clone failures. + . grapheme_extract() properly assigns $next value when skipping over + invalid starting bytes. Previously there were cases where it would + point to the start of the grapheme boundary instead of the end. + - PCNTL: . pcntl_exec() now has a formal return type of false. diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index f33dab8eafa22..77bf4319928a8 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -781,6 +781,7 @@ PHP_FUNCTION(grapheme_extract) while ( !U8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) { pstr++; + start++; if ( pstr >= str_end ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: invalid input string", 0 ); diff --git a/ext/intl/tests/grapheme2.phpt b/ext/intl/tests/grapheme2.phpt index 1fb32a7368217..76aead806b43b 100644 --- a/ext/intl/tests/grapheme2.phpt +++ b/ext/intl/tests/grapheme2.phpt @@ -590,6 +590,8 @@ function ut_main() array( $char_a_ring_nfd . "bcde" . $char_a_ring_nfd . "f", 4, 5, 11, "de" . $char_a_ring_nfd . "f" ), array( $char_a_ring_nfd . "bcde" . $char_a_ring_nfd . "f", 4, -6, 11, "de" . $char_a_ring_nfd . "f" ), + array( "\x95\x00a\x85b", 1, 0, 2, "\x00" ), + array( $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 3, $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd ), array( $char_a_ring_nfd . $char_o_diaeresis_nfd . $char_o_diaeresis_nfd, 2, $char_a_ring_nfd . $char_o_diaeresis_nfd ), array( $char_a_ring_nfd . $char_o_diaeresis_nfd . "c", 1, $char_a_ring_nfd . "" ), @@ -1134,6 +1136,7 @@ extract from "a%CC%8Abcde" "2" graphemes - grapheme_extract starting at byte pos extract from "a%CC%8Abcde" "2" graphemes - grapheme_extract starting at byte position -7 with $next = a%CC%8Ab == a%CC%8Ab $next=4 == 4 extract from "a%CC%8Abcdea%CC%8Af" "4" graphemes - grapheme_extract starting at byte position 5 with $next = dea%CC%8Af == dea%CC%8Af $next=11 == 11 extract from "a%CC%8Abcdea%CC%8Af" "4" graphemes - grapheme_extract starting at byte position -6 with $next = dea%CC%8Af == dea%CC%8Af $next=11 == 11 +extract from "%95%00a%85b" "1" graphemes - grapheme_extract starting at byte position 0 with $next = %00 == %00 $next=2 == 2 extract from "a%CC%8Ao%CC%88o%CC%88" "3" graphemes - grapheme_extract = a%CC%8Ao%CC%88o%CC%88 == a%CC%8Ao%CC%88o%CC%88 extract from "a%CC%8Ao%CC%88o%CC%88" "2" graphemes - grapheme_extract = a%CC%8Ao%CC%88 == a%CC%8Ao%CC%88 extract from "a%CC%8Ao%CC%88c" "1" graphemes - grapheme_extract = a%CC%8A == a%CC%8A