From c108d690de4b7a46327697f4c7c0458f4bc64d9e Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 24 May 2025 12:33:45 +0200 Subject: [PATCH 1/2] Fix GH-18566: [intl] Weird numeric sort in Collator This aligns the behaviour with normal (non-intl) asort() by making the following changes: - Use the same trailing whitespace logic as Zend's is_numeric_ex() - Don't allow errors on trailing data Targeting master because of the BC break. --- UPGRADING | 3 ++ ext/intl/collator/collator_convert.c | 2 +- ext/intl/collator/collator_is_numeric.c | 10 +++++ ext/intl/tests/gh18566.phpt | 51 +++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 ext/intl/tests/gh18566.phpt diff --git a/UPGRADING b/UPGRADING index 400187a40f2c6..ecefb0eca867a 100644 --- a/UPGRADING +++ b/UPGRADING @@ -53,6 +53,9 @@ PHP 8.5 UPGRADE NOTES - Intl: . The extension now requires at least ICU 57.1. + . The behaviour of Collator::SORT_REGULAR with respect to handling numeric + strings is now aligned with the behaviour of SORT_REGULAR in ext/standard. + This is a consequence of fixing bug GH-18566. - LDAP: . ldap_get_option() and ldap_set_option() now throw a ValueError when diff --git a/ext/intl/collator/collator_convert.c b/ext/intl/collator/collator_convert.c index 463348f2a196b..84eccb7904564 100644 --- a/ext/intl/collator/collator_convert.c +++ b/ext/intl/collator/collator_convert.c @@ -317,7 +317,7 @@ zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv ) COLLATOR_CONVERT_RETURN_FAILED( str ); } - if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ 1 ) ) ) + if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ false ) ) ) { if( is_numeric == IS_LONG ) { ZVAL_LONG(rv, lval); diff --git a/ext/intl/collator/collator_is_numeric.c b/ext/intl/collator/collator_is_numeric.c index cf18c8a672223..4c473d9477116 100644 --- a/ext/intl/collator/collator_is_numeric.c +++ b/ext/intl/collator/collator_is_numeric.c @@ -200,6 +200,14 @@ static zend_long collator_u_strtol(const UChar *nptr, UChar **endptr, int base) } /* }}} */ +/* Consume (trailing) whitespace just like collator_u_strtol() consumes leading whitespace */ +static zend_always_inline UChar *collator_skip_ws(UChar *end_ptr) +{ + while (u_isspace(*end_ptr)) { + end_ptr++; + } + return end_ptr; +} /* {{{ collator_is_numeric] * Taken from PHP6:is_numeric_unicode() @@ -217,6 +225,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double errno=0; local_lval = collator_u_strtol(str, &end_ptr_long, 10); if (errno != ERANGE) { + end_ptr_long = collator_skip_ws(end_ptr_long); if (end_ptr_long == str+length) { /* integer string */ if (lval) { *lval = local_lval; @@ -233,6 +242,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double if (local_dval == 0 && end_ptr_double == str) { end_ptr_double = NULL; } else { + end_ptr_double = collator_skip_ws(end_ptr_double); if (end_ptr_double == str+length) { /* floating point string */ if (!zend_finite(local_dval)) { /* "inf","nan" and maybe other weird ones */ diff --git a/ext/intl/tests/gh18566.phpt b/ext/intl/tests/gh18566.phpt new file mode 100644 index 0000000000000..2319c78f1e779 --- /dev/null +++ b/ext/intl/tests/gh18566.phpt @@ -0,0 +1,51 @@ +--TEST-- +GH-18566 ([intl] Weird numeric sort in Collator) +--FILE-- +asort($arr, Collator::SORT_REGULAR); +print_r($arr); + +$arr = [ + ' 100000', + ' 10', + ' -100 ', +]; + +$coll = Collator::create('en'); +$coll->asort($arr, Collator::SORT_REGULAR); +print_r($arr); + +?> +--EXPECT-- +Array +( + [8] => + [4] => 2021-01-03 12:00:00 + [2] => 2023-01-03 12:00:00 + [3] => 2023-01-03 12:00:00 + [7] => 2023-01-03 12:00:00 + [5] => 2023-01-05 14:00:00 + [1] => 2023-01-08 12:00:00 + [0] => 2023-02-04 14:00:00 + [6] => 2024-01-03 12:00:00 +) +Array +( + [2] => -100 + [1] => 10 + [0] => 100000 +) From bd9ca44468ecaa93c07ca4c1404dfce708fbef4b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 24 May 2025 13:14:19 +0200 Subject: [PATCH 2/2] Forgot extensions section --- ext/intl/tests/gh18566.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/intl/tests/gh18566.phpt b/ext/intl/tests/gh18566.phpt index 2319c78f1e779..2c689de7ab186 100644 --- a/ext/intl/tests/gh18566.phpt +++ b/ext/intl/tests/gh18566.phpt @@ -1,5 +1,7 @@ --TEST-- GH-18566 ([intl] Weird numeric sort in Collator) +--EXTENSIONS-- +intl --FILE--