From 85225a886d00c6cb9d69e3a547098e74febfac1a Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Sun, 26 Nov 2023 15:40:10 +0100 Subject: [PATCH 1/2] Use zval_try_get_long() in NumberFormatter::format() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To improve precision handling in cases where the number doesn’t fit into a 32-bit or 64-bit integer. A float that doesn’t fit into a long will produce a deprecation warning; for NumberFormatter::TYPE_INT32, a number that doesn’t fit into 32 bits will additionally produce a ValueError. (This is inconsistent, but I’m not sure how to do it better.) --- ext/intl/formatter/formatter_format.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/ext/intl/formatter/formatter_format.c b/ext/intl/formatter/formatter_format.c index 1c9d5b5bd9103..be5c57af57c1e 100644 --- a/ext/intl/formatter/formatter_format.c +++ b/ext/intl/formatter/formatter_format.c @@ -59,24 +59,37 @@ PHP_FUNCTION( numfmt_format ) switch(type) { case FORMAT_TYPE_INT32: + { + bool failed = true; + int64_t value_64 = zval_try_get_long(number, &failed); + if (failed || value_64 < -2147483648 || value_64 > 2147483647) { + zend_argument_value_error(object ? 1 : 2, "must be numeric and fit in 32 bits"); + RETURN_THROWS(); + } convert_to_long(number); - formatted_len = unum_format(FORMATTER_OBJECT(nfo), (int32_t)Z_LVAL_P(number), + formatted_len = unum_format(FORMATTER_OBJECT(nfo), (int32_t)value_64, formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); if (INTL_DATA_ERROR_CODE(nfo) == U_BUFFER_OVERFLOW_ERROR) { intl_error_reset(INTL_DATA_ERROR_P(nfo)); formatted = eumalloc(formatted_len); - formatted_len = unum_format(FORMATTER_OBJECT(nfo), (int32_t)Z_LVAL_P(number), + formatted_len = unum_format(FORMATTER_OBJECT(nfo), (int32_t)value_64, formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); if (U_FAILURE( INTL_DATA_ERROR_CODE(nfo) ) ) { efree(formatted); } } INTL_METHOD_CHECK_STATUS( nfo, "Number formatting failed" ); + } break; case FORMAT_TYPE_INT64: { - int64_t value = (Z_TYPE_P(number) == IS_DOUBLE)?(int64_t)Z_DVAL_P(number):Z_LVAL_P(number); + bool failed = true; + int64_t value = zval_try_get_long(number, &failed); + if (failed) { + zend_argument_value_error(object ? 1 : 2, "must be numeric"); + RETURN_THROWS(); + } formatted_len = unum_formatInt64(FORMATTER_OBJECT(nfo), value, formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); if (INTL_DATA_ERROR_CODE(nfo) == U_BUFFER_OVERFLOW_ERROR) { intl_error_reset(INTL_DATA_ERROR_P(nfo)); From 18b01361a58dd4f596949ca58aa929bede2811e7 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Fri, 22 Jan 2021 17:24:18 -0500 Subject: [PATCH 2/2] Fix #76093: Format strings w/o loss of precision w/ FORMAT_TYPE_DECIMAL Passing the argument to NumberFormat::format() as a number loses precision if the value can not be represented precisely as a double or long integer. The icu library provides a "decimal number" type that avoids the loss of prevision when the value is passed as a string. Add a new FORMAT_TYPE_DECIMAL to explicitly request the argument be converted to a string and then passed to icu that way. Co-authored-by: Gina Peter Banyard --- ext/intl/formatter/formatter.stub.php | 4 +- ext/intl/formatter/formatter_arginfo.h | 10 ++- ext/intl/formatter/formatter_format.c | 50 ++++++++++++-- ext/intl/formatter/formatter_format.h | 1 + ext/intl/php_intl.stub.php | 2 +- ext/intl/php_intl_arginfo.h | 4 +- ext/intl/tests/bug48227.phpt | 38 +++++++++-- ext/intl/tests/bug76093.phpt | 91 ++++++++++++++++++++++++++ 8 files changed, 185 insertions(+), 15 deletions(-) create mode 100644 ext/intl/tests/bug76093.phpt diff --git a/ext/intl/formatter/formatter.stub.php b/ext/intl/formatter/formatter.stub.php index 430c02bd60a67..58782400d082b 100644 --- a/ext/intl/formatter/formatter.stub.php +++ b/ext/intl/formatter/formatter.stub.php @@ -171,6 +171,8 @@ class NumberFormatter public const int TYPE_INT64 = UNKNOWN; /** @cvalue FORMAT_TYPE_DOUBLE */ public const int TYPE_DOUBLE = UNKNOWN; + /** @cvalue FORMAT_TYPE_DECIMAL */ + public const int TYPE_DECIMAL = UNKNOWN; /** * @deprecated * @cvalue FORMAT_TYPE_CURRENCY @@ -189,7 +191,7 @@ public static function create(string $locale, int $style, ?string $pattern = nul * @tentative-return-type * @alias numfmt_format */ - public function format(int|float $num, int $type = NumberFormatter::TYPE_DEFAULT): string|false {} + public function format(int|float|string $num, int $type = NumberFormatter::TYPE_DEFAULT): string|false {} /** * @param int $offset diff --git a/ext/intl/formatter/formatter_arginfo.h b/ext/intl/formatter/formatter_arginfo.h index 8d406350aef5f..c05b2decc7215 100644 --- a/ext/intl/formatter/formatter_arginfo.h +++ b/ext/intl/formatter/formatter_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 6178ab530b85aca5c90fb7369c5019bb5cbfe8a7 */ + * Stub hash: de07167400ecde33e20835a186f716e21a75cc78 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_NumberFormatter___construct, 0, 0, 2) ZEND_ARG_TYPE_INFO(0, locale, IS_STRING, 0) @@ -14,7 +14,7 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_NumberFormatter_c ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_NumberFormatter_format, 0, 1, MAY_BE_STRING|MAY_BE_FALSE) - ZEND_ARG_TYPE_MASK(0, num, MAY_BE_LONG|MAY_BE_DOUBLE, NULL) + ZEND_ARG_TYPE_MASK(0, num, MAY_BE_LONG|MAY_BE_DOUBLE|MAY_BE_STRING, NULL) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, type, IS_LONG, 0, "NumberFormatter::TYPE_DEFAULT") ZEND_END_ARG_INFO() @@ -568,6 +568,12 @@ static zend_class_entry *register_class_NumberFormatter(void) zend_declare_typed_class_constant(class_entry, const_TYPE_DOUBLE_name, &const_TYPE_DOUBLE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); zend_string_release(const_TYPE_DOUBLE_name); + zval const_TYPE_DECIMAL_value; + ZVAL_LONG(&const_TYPE_DECIMAL_value, FORMAT_TYPE_DECIMAL); + zend_string *const_TYPE_DECIMAL_name = zend_string_init_interned("TYPE_DECIMAL", sizeof("TYPE_DECIMAL") - 1, 1); + zend_declare_typed_class_constant(class_entry, const_TYPE_DECIMAL_name, &const_TYPE_DECIMAL_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); + zend_string_release(const_TYPE_DECIMAL_name); + zval const_TYPE_CURRENCY_value; ZVAL_LONG(&const_TYPE_CURRENCY_value, FORMAT_TYPE_CURRENCY); zend_string *const_TYPE_CURRENCY_name = zend_string_init_interned("TYPE_CURRENCY", sizeof("TYPE_CURRENCY") - 1, 1); diff --git a/ext/intl/formatter/formatter_format.c b/ext/intl/formatter/formatter_format.c index be5c57af57c1e..95cda56652dd5 100644 --- a/ext/intl/formatter/formatter_format.c +++ b/ext/intl/formatter/formatter_format.c @@ -35,7 +35,7 @@ PHP_FUNCTION( numfmt_format ) FORMATTER_METHOD_INIT_VARS; /* Parse parameters. */ - if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "On|l", + if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "Oz|l", &object, NumberFormatter_ce_ptr, &number, &type ) == FAILURE ) { RETURN_THROWS(); @@ -53,17 +53,38 @@ PHP_FUNCTION( numfmt_format ) case IS_DOUBLE: type = FORMAT_TYPE_DOUBLE; break; - EMPTY_SWITCH_DEFAULT_CASE(); + case IS_STRING: + type = FORMAT_TYPE_DECIMAL; + break; + case IS_OBJECT: + type = FORMAT_TYPE_DECIMAL; + break; + default: + zend_argument_type_error(1, "must be of type int|float|string, %s given", zend_zval_type_name(number)); } } + // Avoid losing precision on 32-bit platforms where PHP's "long" isn't + // as long as the FORMAT_TYPE_INT64 which is requested. +#if SIZEOF_ZEND_LONG < 8 + if (Z_TYPE_P(number) == IS_STRING && type == FORMAT_TYPE_INT64) { + type = FORMAT_TYPE_DECIMAL; + } +#endif + switch(type) { case FORMAT_TYPE_INT32: { bool failed = true; int64_t value_64 = zval_try_get_long(number, &failed); - if (failed || value_64 < -2147483648 || value_64 > 2147483647) { - zend_argument_value_error(object ? 1 : 2, "must be numeric and fit in 32 bits"); + if (failed) { + zend_argument_type_error(getThis() ? 1 : 2, + "must be of type int when argument #%d ($format) is NumberFormatter::TYPE_INT32", getThis() ? 2 : 3); + RETURN_THROWS(); + } + if (value_64 < -2147483648 || value_64 > 2147483647) { + zend_argument_value_error(getThis() ? 1 : 2, + "must fit in 32 bits when argument #%d ($format) is NumberFormatter::TYPE_INT32", getThis() ? 2 : 3); RETURN_THROWS(); } convert_to_long(number); @@ -87,7 +108,8 @@ PHP_FUNCTION( numfmt_format ) bool failed = true; int64_t value = zval_try_get_long(number, &failed); if (failed) { - zend_argument_value_error(object ? 1 : 2, "must be numeric"); + zend_argument_type_error(getThis() ? 1 : 2, + "must be of type int when argument #%d ($format) is NumberFormatter::TYPE_INT64", getThis() ? 2 : 3); RETURN_THROWS(); } formatted_len = unum_formatInt64(FORMATTER_OBJECT(nfo), value, formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); @@ -116,6 +138,24 @@ PHP_FUNCTION( numfmt_format ) } INTL_METHOD_CHECK_STATUS( nfo, "Number formatting failed" ); break; + + case FORMAT_TYPE_DECIMAL: + if (!try_convert_to_string(number)) { + RETURN_THROWS(); + } + // Convert string as a DecimalNumber, so we don't lose precision + formatted_len = unum_formatDecimal(FORMATTER_OBJECT(nfo), Z_STRVAL_P(number), Z_STRLEN_P(number), formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); + if (INTL_DATA_ERROR_CODE(nfo) == U_BUFFER_OVERFLOW_ERROR) { + intl_error_reset(INTL_DATA_ERROR_P(nfo)); + formatted = eumalloc(formatted_len); + unum_formatDecimal(FORMATTER_OBJECT(nfo), Z_STRVAL_P(number), Z_STRLEN_P(number), formatted, formatted_len, NULL, &INTL_DATA_ERROR_CODE(nfo)); + if (U_FAILURE( INTL_DATA_ERROR_CODE(nfo) ) ) { + efree(formatted); + } + } + INTL_METHOD_CHECK_STATUS( nfo, "Number formatting failed" ); + break; + case FORMAT_TYPE_CURRENCY: if (getThis()) { const char *space; diff --git a/ext/intl/formatter/formatter_format.h b/ext/intl/formatter/formatter_format.h index 0238d5d4b8b08..58a2cc63a40e1 100644 --- a/ext/intl/formatter/formatter_format.h +++ b/ext/intl/formatter/formatter_format.h @@ -22,5 +22,6 @@ #define FORMAT_TYPE_INT64 2 #define FORMAT_TYPE_DOUBLE 3 #define FORMAT_TYPE_CURRENCY 4 +#define FORMAT_TYPE_DECIMAL 5 #endif // FORMATTER_FORMAT_H diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index eab42fcc0ff57..172874650d83c 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -390,7 +390,7 @@ function datefmt_get_error_message(IntlDateFormatter $formatter): string {} function numfmt_create(string $locale, int $style, ?string $pattern = null): ?NumberFormatter {} -function numfmt_format(NumberFormatter $formatter, int|float $num, int $type = NumberFormatter::TYPE_DEFAULT): string|false {} +function numfmt_format(NumberFormatter $formatter, int|float|string $num, int $type = NumberFormatter::TYPE_DEFAULT): string|false {} /** @param int $offset */ function numfmt_parse(NumberFormatter $formatter, string $string, int $type = NumberFormatter::TYPE_DOUBLE, &$offset = null): int|float|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index c05ecb7b24973..1567dd9a5dd6b 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: c32e74bddb55455f69083a302bcaf52f654b1293 */ + * Stub hash: 0debcd6d3e433715cb5c6615a15dec48bdc4e2dc */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -375,7 +375,7 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_numfmt_format, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_OBJ_INFO(0, formatter, NumberFormatter, 0) - ZEND_ARG_TYPE_MASK(0, num, MAY_BE_LONG|MAY_BE_DOUBLE, NULL) + ZEND_ARG_TYPE_MASK(0, num, MAY_BE_LONG|MAY_BE_DOUBLE|MAY_BE_STRING, NULL) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, type, IS_LONG, 0, "NumberFormatter::TYPE_DEFAULT") ZEND_END_ARG_INFO() diff --git a/ext/intl/tests/bug48227.phpt b/ext/intl/tests/bug48227.phpt index 8371a872eaee8..31aeb1c101b0c 100644 --- a/ext/intl/tests/bug48227.phpt +++ b/ext/intl/tests/bug48227.phpt @@ -5,8 +5,11 @@ intl --FILE-- format($value)); } catch (TypeError $ex) { @@ -14,11 +17,38 @@ foreach (['', 1, NULL, $x] as $value) { } } +echo "\nProcedural\n"; +$x = numfmt_create('en_US', NumberFormatter::DECIMAL); +foreach (array_merge($testcases, [$x]) as $value) { + try { + var_dump(numfmt_format($x, $value)); + } catch (TypeError $ex) { + echo $ex->getMessage(), PHP_EOL; + } +} + ?> --EXPECTF-- -NumberFormatter::format(): Argument #1 ($num) must be of type int|float, string given +OOP +bool(false) string(1) "1" -Deprecated: NumberFormatter::format(): Passing null to parameter #1 ($num) of type int|float is deprecated in %s on line %d +Deprecated: NumberFormatter::format(): Passing null to parameter #1 ($num) of type string|int|float is deprecated in %s on line %d +string(1) "0" +string(1) "1" +string(1) "0" +NumberFormatter::format(): Argument #1 ($num) must be of type string|int|float, array given +NumberFormatter::format(): Argument #1 ($num) must be of type string|int|float, stdClass given +NumberFormatter::format(): Argument #1 ($num) must be of type string|int|float, NumberFormatter given + +Non-OOP +bool(false) +string(1) "1" + +Deprecated: numfmt_format(): Passing null to parameter #2 ($num) of type string|int|float is deprecated in %s on line %d +string(1) "0" +string(1) "1" string(1) "0" -NumberFormatter::format(): Argument #1 ($num) must be of type int|float, NumberFormatter given +numfmt_format(): Argument #2 ($num) must be of type string|int|float, array given +numfmt_format(): Argument #2 ($num) must be of type string|int|float, stdClass given +numfmt_format(): Argument #2 ($num) must be of type string|int|float, NumberFormatter given diff --git a/ext/intl/tests/bug76093.phpt b/ext/intl/tests/bug76093.phpt new file mode 100644 index 0000000000000..74bc798a43ad7 --- /dev/null +++ b/ext/intl/tests/bug76093.phpt @@ -0,0 +1,91 @@ +--TEST-- +Bug #76093 (NumberFormatter::format loses precision) +--EXTENSIONS-- +intl +--FILE-- +string; + } +} + +# See also https://phabricator.wikimedia.org/T268456 +$x = new NumberFormatter('en_US', NumberFormatter::DECIMAL); +foreach ([ + '999999999999999999', # Fits in signed 64-bit integer + '9999999999999999999', # Does not fit in signed 64-bit integer + 9999999999999999999, # Precision loss seen when passing as number + ] as $value) { + try { + var_dump([ + 'input' => $value, + 'default' => $x->format($value), + # Note that TYPE_INT64 isn't actually guaranteed to have an + # 64-bit integer as input, because PHP on 32-bit platforms only + # has 32-bit integers. If you pass the value as a string, PHP + # will use the TYPE_DECIMAL type in order to extend the range. + # Also, casting from double to int64 when the int64 range + # is exceeded results in an implementation-defined value. + 'int64' => $x->format($value, NumberFormatter::TYPE_INT64), + 'double' => $x->format($value, NumberFormatter::TYPE_DOUBLE), + 'decimal' => $x->format($value, NumberFormatter::TYPE_DECIMAL), + ]); + } catch (TypeError $ex) { + echo $ex->getMessage(), PHP_EOL; + } +} + +# Stringable object also supported +try { + echo $x->format(new Bug76093Stringable('9999999999999999999')), PHP_EOL; +} catch (TypeError $ex) { + echo $ex->getMessage(), PHP_EOL; +} + +?> +--EXPECTF-- +array(5) { + ["input"]=> + string(18) "999999999999999999" + ["default"]=> + string(23) "999,999,999,999,999,999" + ["int64"]=> + string(23) "999,999,999,999,999,999" + ["double"]=> + string(25) "1,000,000,000,000,000,000" + ["decimal"]=> + string(23) "999,999,999,999,999,999" +} + +Deprecated: Implicit conversion from float-string "9999999999999999999" to int loses precision in %s on line %d +array(5) { + ["input"]=> + string(19) "9999999999999999999" + ["default"]=> + string(25) "9,999,999,999,999,999,999" + ["int64"]=> + string(%d) "%r9,223,372,036,854,775,807|9,999,999,999,999,999,999%r" + ["double"]=> + string(26) "10,000,000,000,000,000,000" + ["decimal"]=> + string(25) "9,999,999,999,999,999,999" +} + +Deprecated: Implicit conversion from float 1.0E+19 to int loses precision in %s on line %d +array(5) { + ["input"]=> + float(1.0E+19) + ["default"]=> + string(26) "10,000,000,000,000,000,000" + ["int64"]=> + string(%d) "%r[+-]?[0-9,]+%r" + ["double"]=> + string(26) "10,000,000,000,000,000,000" + ["decimal"]=> + string(26) "10,000,000,000,000,000,000" +} +9,999,999,999,999,999,999