From d278701cc659084041af91568b4e4175d39fdca8 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 27 Mar 2024 22:23:12 +0100 Subject: [PATCH] Fix GH-13815: mb_trim() inaccurate $characters default value Because the default characters are defined in the stub file, and the stub file is UTF-8 (typically), the characters are encoded in the string as UTF-8. When using a different character encoding, there is a mismatch between what mb_trim expects and the UTF-8 encoded string it gets. One way of solving this is by making the characters argument nullable, which would mean that it always uses the internal code path that has the unicode codepoints that are defaulted actually stored as codepoint numbers instead of in a string. Co-authored-by: @ranvis --- ext/mbstring/mbstring.c | 2 +- ext/mbstring/mbstring.stub.php | 6 +++--- ext/mbstring/mbstring_arginfo.h | 4 ++-- ext/mbstring/tests/gh13815.phpt | 20 ++++++++++++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 ext/mbstring/tests/gh13815.phpt diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 12ec4411768c1..8f65681c3cee1 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3129,7 +3129,7 @@ static void php_do_mb_trim(INTERNAL_FUNCTION_PARAMETERS, mb_trim_mode mode) ZEND_PARSE_PARAMETERS_START(1, 3) Z_PARAM_STR(str) Z_PARAM_OPTIONAL - Z_PARAM_STR(what) + Z_PARAM_STR_OR_NULL(what) Z_PARAM_STR_OR_NULL(encoding) ZEND_PARSE_PARAMETERS_END(); diff --git a/ext/mbstring/mbstring.stub.php b/ext/mbstring/mbstring.stub.php index 6eb041bf1eba2..af9c5cbb93ea2 100644 --- a/ext/mbstring/mbstring.stub.php +++ b/ext/mbstring/mbstring.stub.php @@ -139,11 +139,11 @@ function mb_ucfirst(string $string, ?string $encoding = null): string {} function mb_lcfirst(string $string, ?string $encoding = null): string {} -function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {} +function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string {} -function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {} +function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string {} -function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {} +function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string {} /** @refcount 1 */ function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false {} diff --git a/ext/mbstring/mbstring_arginfo.h b/ext/mbstring/mbstring_arginfo.h index f40715b13ab7c..a4c63aa2824e1 100644 --- a/ext/mbstring/mbstring_arginfo.h +++ b/ext/mbstring/mbstring_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: ea642b9010bc38a3b13710662fef48663d4385e1 */ + * Stub hash: 03c07f68bea7d7b96e6dc11f180f45663b859ed3 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null") @@ -124,7 +124,7 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_mb_trim, 0, 1, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, characters, IS_STRING, 0, "\" \\f\\n\\r\\t\\v\\x00             

   …᠎\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, characters, IS_STRING, 1, "null") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") ZEND_END_ARG_INFO() diff --git a/ext/mbstring/tests/gh13815.phpt b/ext/mbstring/tests/gh13815.phpt new file mode 100644 index 0000000000000..f5d49c0aa1d28 --- /dev/null +++ b/ext/mbstring/tests/gh13815.phpt @@ -0,0 +1,20 @@ +--TEST-- +GH-13815 (mb_trim() inaccurate $characters default value) +--EXTENSIONS-- +mbstring +--FILE-- + +--EXPECT-- +int(1) +int(1) +int(1) +int(1)