Skip to content

Commit 5187ff2

Browse files
committed
Fix GH-18566: [intl] Weird numeric sort in Collator
This aligns the behaviour with normal (non-intl) asort() by making the following changes: - Use the same trailing whitespace logic as Zend's is_numeric_ex() - Don't allow errors on trailing data Targeting master because of the BC break. Closes GH-18632.
1 parent 5e21ffe commit 5187ff2

File tree

5 files changed

+68
-1
lines changed

5 files changed

+68
-1
lines changed

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ PHP NEWS
9898
adding/removing likely subtags to a locale. (David Carlier)
9999
. Added IntlListFormatter class to format a list of items with a locale
100100
, operands types and units. (BogdanUngureanu)
101+
. Fixed bug GH-18566 ([intl] Weird numeric sort in Collator). (nielsdos)
101102

102103
- LDAP:
103104
. Allow ldap_get_option to retrieve global option by allowing NULL for

UPGRADING

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ PHP 8.5 UPGRADE NOTES
5353

5454
- Intl:
5555
. The extension now requires at least ICU 57.1.
56+
. The behaviour of Collator::SORT_REGULAR with respect to handling numeric
57+
strings is now aligned with the behaviour of SORT_REGULAR in ext/standard.
58+
This is a consequence of fixing bug GH-18566.
5659

5760
- LDAP:
5861
. ldap_get_option() and ldap_set_option() now throw a ValueError when

ext/intl/collator/collator_convert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
317317
COLLATOR_CONVERT_RETURN_FAILED( str );
318318
}
319319

320-
if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ 1 ) ) )
320+
if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ false ) ) )
321321
{
322322
if( is_numeric == IS_LONG ) {
323323
ZVAL_LONG(rv, lval);

ext/intl/collator/collator_is_numeric.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,14 @@ static zend_long collator_u_strtol(const UChar *nptr, UChar **endptr, int base)
200200
}
201201
/* }}} */
202202

203+
/* Consume (trailing) whitespace just like collator_u_strtol() consumes leading whitespace */
204+
static zend_always_inline UChar *collator_skip_ws(UChar *end_ptr)
205+
{
206+
while (u_isspace(*end_ptr)) {
207+
end_ptr++;
208+
}
209+
return end_ptr;
210+
}
203211

204212
/* {{{ collator_is_numeric]
205213
* Taken from PHP6:is_numeric_unicode()
@@ -217,6 +225,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double
217225
errno=0;
218226
local_lval = collator_u_strtol(str, &end_ptr_long, 10);
219227
if (errno != ERANGE) {
228+
end_ptr_long = collator_skip_ws(end_ptr_long);
220229
if (end_ptr_long == str+length) { /* integer string */
221230
if (lval) {
222231
*lval = local_lval;
@@ -233,6 +242,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double
233242
if (local_dval == 0 && end_ptr_double == str) {
234243
end_ptr_double = NULL;
235244
} else {
245+
end_ptr_double = collator_skip_ws(end_ptr_double);
236246
if (end_ptr_double == str+length) { /* floating point string */
237247
if (!zend_finite(local_dval)) {
238248
/* "inf","nan" and maybe other weird ones */

ext/intl/tests/gh18566.phpt

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
--TEST--
2+
GH-18566 ([intl] Weird numeric sort in Collator)
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$arr = [
9+
'2023-02-04 14:00:00',
10+
'2023-01-08 12:00:00',
11+
'2023-01-03 12:00:00',
12+
'2023-01-03 12:00:00',
13+
'2021-01-03 12:00:00',
14+
'2023-01-05 14:00:00',
15+
'2024-01-03 12:00:00',
16+
'2023-01-03 12:00:00',
17+
' ',
18+
];
19+
20+
$coll = Collator::create('en');
21+
$coll->asort($arr, Collator::SORT_REGULAR);
22+
print_r($arr);
23+
24+
$arr = [
25+
' 100000',
26+
' 10',
27+
' -100 ',
28+
];
29+
30+
$coll = Collator::create('en');
31+
$coll->asort($arr, Collator::SORT_REGULAR);
32+
print_r($arr);
33+
34+
?>
35+
--EXPECT--
36+
Array
37+
(
38+
[8] =>
39+
[4] => 2021-01-03 12:00:00
40+
[2] => 2023-01-03 12:00:00
41+
[3] => 2023-01-03 12:00:00
42+
[7] => 2023-01-03 12:00:00
43+
[5] => 2023-01-05 14:00:00
44+
[1] => 2023-01-08 12:00:00
45+
[0] => 2023-02-04 14:00:00
46+
[6] => 2024-01-03 12:00:00
47+
)
48+
Array
49+
(
50+
[2] => -100
51+
[1] => 10
52+
[0] => 100000
53+
)

0 commit comments

Comments
 (0)