From f60ffed317ffe1d23cda2c13f1f21daccc75ae75 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:11:10 -0500 Subject: [PATCH 1/5] ext/gettext/gettext.c: handle NULLs from bindtextdomain() According to POSIX, bindtextdomain() returns "the implementation- defined default directory pathname used by the gettext family of functions" when its second parameter is NULL (i.e. when you are querying the directory corresponding to some text domain and that directory has not yet been set). Its PHP counterpart is feeding that result direclty to RETURN_STRING, but this can go wrong in two ways: 1. If an error occurs, even POSIX-compliant implementations may return NULL. 2. At least one non-compliant implementation (musl) lacks a default directory and returns NULL whenever the domain has not yet been bound. In either of those cases, PHP segfaults on the NULL string. In this commit we check for the NULL, and RETURN_FALSE when it happens rather than crashing. This partially addresses GH #13696 --- ext/gettext/gettext.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ext/gettext/gettext.c b/ext/gettext/gettext.c index 15af3cb9b57ff..eba2a591e9f1b 100644 --- a/ext/gettext/gettext.c +++ b/ext/gettext/gettext.c @@ -167,7 +167,7 @@ PHP_FUNCTION(bindtextdomain) char *domain; size_t domain_len; zend_string *dir = NULL; - char *retval, dir_name[MAXPATHLEN]; + char *retval, dir_name[MAXPATHLEN], *btd_result; if (zend_parse_parameters(ZEND_NUM_ARGS(), "sS!", &domain, &domain_len, &dir) == FAILURE) { RETURN_THROWS(); @@ -181,7 +181,16 @@ PHP_FUNCTION(bindtextdomain) } if (dir == NULL) { - RETURN_STRING(bindtextdomain(domain, NULL)); + btd_result = bindtextdomain(domain, NULL); + if (btd_result == NULL) { + /* POSIX-compliant implementations can return + * NULL if an error occured. On musl you will + * also get NULL if the domain is not yet + * bound, because musl has no default directory + * to return in that case. */ + RETURN_FALSE; + } + RETURN_STRING(btd_result); } if (ZSTR_LEN(dir) != 0 && !zend_string_equals_literal(dir, "0")) { From ce0dbef68778880ae39902fc6383f9c8925f7fe9 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:17:16 -0500 Subject: [PATCH 2/5] ext/gettext/tests: fix libintl return values under musl Musl has two quirks that are leading to failed internationalization tests. First is that the return value of bindtextdomain(..., NULL) will always be false, rather than an "implementation-defined default directory," because musl does not have an implementation-defined default directory. One test needs a special case for this. Second is that the musl implementation of bind_textdomain_codeset() always returns NULL. The POSIX-correctness of this is debatable, but it is roughly equivalent to correct, because musl only support UTF-8, so the NULL value indicating that the codeset is unchanged from the locale's codeset (UTF-8) is accurate. PHP's bind_textdomain_codeset() function however treats NULL as failure, unconditionally: * https://github.com/php/doc-en/issues/4311 * https://github.com/php/php-src/issues/17163 This unfortunately causes false to be returned consistently on musl -- even when nothing unexpected has happened -- and naturally this is affecting several tests. For now we change two tests to accept "false" in addition to "UTF-8" so that they may pass on musl. If PHP's bind_textdomain_codeset() is updated to differentiate between NULL and NULL-with-errno-set, these tests can also be updated once again to reject the NULL-with-errno result. This partially addresses GH #13696 --- ext/gettext/tests/bug53251.phpt | 28 +++++++++++++------ ...ettext_bind_textdomain_codeset-retval.phpt | 12 ++++++-- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ext/gettext/tests/bug53251.phpt b/ext/gettext/tests/bug53251.phpt index 6f37642925d37..75f8f08b74ccb 100644 --- a/ext/gettext/tests/bug53251.phpt +++ b/ext/gettext/tests/bug53251.phpt @@ -8,18 +8,28 @@ if (getenv('SKIP_REPEAT')) die('skip gettext leaks global state across requests' ?> --FILE-- --EXPECT-- bool(true) -bool(true) -bool(false) -string(5) "UTF-8" -string(5) "UTF-8" diff --git a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt index 47c821648fccb..90404178d15f7 100644 --- a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt +++ b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt @@ -5,13 +5,21 @@ gettext --FILE-- --EXPECT-- bool(false) -string(5) "UTF-8" +bool(true) Done --CREDITS-- Florian Holzhauer fh-pt@fholzhauer.de From a705006392fbb42e6e0010a17733c7864c391651 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:20:06 -0500 Subject: [PATCH 3/5] ext/gettext/config.m4: symlink en_US.UTF-8 test bits to en_US for musl The gettext() family of functions under musl does not support codeset suffixes like ".UTF-8", because the only codeset it understands is UTF-8. (Yes, it is annoying that it doesn't support the suffix for the codeset that it does understand; no, I am not in charge.) Thanks to this, we have six failing tests on musl, * FAIL Gettext basic test with en_US locale that should be on nearly every system [ext/gettext/tests/gettext_basic-enus.phpt] * FAIL Test if bindtextdomain() returns string id if no directory path is set( if directory path is 'null') [ext/gettext/tests/gettext_bindtextdomain-cwd.phpt] * FAIL Test dcgettext() functionality [ext/gettext/tests/gettext_dcgettext.phpt] * FAIL Test dgettext() functionality [ext/gettext/tests/gettext_dgettext.phpt] * FAIL Test if dngettext() returns the correct translations (optionally plural). [ext/gettext/tests/gettext_dngettext-plural.phpt] * FAIL Test ngettext() functionality [ext/gettext/tests/gettext_ngettext.phpt] These are all fixed by symlinking the en_US.UTF-8 message data to en_US, where musl is able to find it. This does not make the situation any better for developers (who don't know what libc their users will be running), but that problem is inhereted from C and is not the fault of the gettext extension. This partially addresses GH #13696 --- ext/gettext/config.m4 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ext/gettext/config.m4 b/ext/gettext/config.m4 index 9e304d82b8d29..b3a6c35d6c006 100644 --- a/ext/gettext/config.m4 +++ b/ext/gettext/config.m4 @@ -25,6 +25,17 @@ if test "$PHP_GETTEXT" != "no"; then AC_CHECK_LIB(c, bindtextdomain, [ GETTEXT_LIBS= GETTEXT_CHECK_IN_LIB=c + + dnl If libintl.h is provided by libc, it's possible that libc is musl. + dnl The gettext family of functions under musl ignores the codeset + dnl suffix on directories like "en_US.UTF-8"; instead they look only + dnl in "en_US". To accomodate that, we symlink some test data from one + dnl to the other. + AC_MSG_NOTICE([symlinking en_US.UTF-8 messages to en_US in case you are on musl]) + _linkdest="${srcdir%/}"/ext/gettext/tests/locale/en_US + AS_IF([test ! -e "${_linkdest}"],[ + ln -s en_US.UTF-8 "${_linkdest}" + ]) ],[ AC_MSG_ERROR(Unable to find required gettext library) ]) From 385a91ffe24f18946ad2f3c2ed384adb7c6b714a Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 15 Dec 2024 11:26:42 -0500 Subject: [PATCH 4/5] .gitignore: ignore gettext test data symlink used on musl If gettext() et al. come from musl libc, ./configure will symlink ext/gettext/tests/locale/en_US.UTF-8 to en_US (without the ".UTF-8" suffix). We want to ignore the symlink. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 49acc9f2e1788..449963153f36b 100644 --- a/.gitignore +++ b/.gitignore @@ -177,6 +177,9 @@ php /ext/*/configure.ac /ext/*/run-tests.php +# Generated by ./configure if libc might be musl +/ext/gettext/tests/locale/en_US + # ------------------------------------------------------------------------------ # Generated by Windows build system # ------------------------------------------------------------------------------ From 4daa762ac8ca29e234555c9e6c96314a70907380 Mon Sep 17 00:00:00 2001 From: Arnaud Le Blanc Date: Thu, 19 Dec 2024 17:38:21 +0100 Subject: [PATCH 5/5] Trigger build (and CS) --- ext/gettext/tests/bug53251.phpt | 2 +- ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/gettext/tests/bug53251.phpt b/ext/gettext/tests/bug53251.phpt index 75f8f08b74ccb..d568be6bc079a 100644 --- a/ext/gettext/tests/bug53251.phpt +++ b/ext/gettext/tests/bug53251.phpt @@ -29,7 +29,7 @@ $expected = [true, true, false, "UTF-8", "UTF-8"]; // $expected_musl = [false, true, false, false, false]; -var_dump($results === $expected or $results === $expected_musl); +var_dump($results === $expected || $results === $expected_musl); ?> --EXPECT-- bool(true) diff --git a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt index 90404178d15f7..941bab79bffa2 100644 --- a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt +++ b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt @@ -13,7 +13,7 @@ gettext // * https://github.com/php/php-src/issues/17163 // $result = bind_textdomain_codeset('messages', "UTF-8"); - var_dump($result === false or $result === "UTF-8"); + var_dump($result === false || $result === "UTF-8"); echo "Done\n"; ?>