diff --git a/NEWS b/NEWS index 0a53434c36a9f..09da40c962e28 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,8 @@ PHP NEWS . net_get_interfaces() also reports wireless network interfaces on Windows. (Yurun) . Finished AVIF support in getimagesize(). (Yannis Guyon) + . Fixed bug GH-7847 (stripos with large haystack has bad performance). + (ilutov) - Zip: . add ZipArchive::clearError() method diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 7175f0b54b4db..cec9356406556 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -17,6 +17,9 @@ PHP 8.2 INTERNALS UPGRADE NOTES zend_binary_str(n)casecmp() as one would expect. Call the appropriate wrapped function directly instead. * Removed the (ZEND_)WRONG_PARAM_COUNT_WITH_RETVAL() macros. +* php_stristr() no longer lowercases the haystack and needle as a side effect. + Call zend_str_tolower() yourself if necessary. You no longer need to copy + the haystack and needle before passing them to php_stristr(). ======================== 2. Build system changes diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index eb88eedb79da1..48418ea0b44aa 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -919,6 +919,67 @@ static zend_always_inline void zend_unwrap_reference(zval *op) /* {{{ */ } /* }}} */ +static zend_always_inline bool zend_strnieq(const char *ptr1, const char *ptr2, size_t num) +{ + const char *end = ptr1 + num; + while (ptr1 < end) { + if (zend_tolower_ascii(*ptr1++) != zend_tolower_ascii(*ptr2++)) { + return 0; + } + } + return 1; +} + +static zend_always_inline const char * +zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const char *end) +{ + ZEND_ASSERT(end >= haystack); + + if (UNEXPECTED(needle_len > (end - haystack))) { + return NULL; + } + + if (UNEXPECTED(needle_len == 0)) { + return haystack; + } + + const char first_lower = zend_tolower_ascii(*needle); + const char first_upper = zend_toupper_ascii(*needle); + const char *p_lower = (const char *)memchr(haystack, first_lower, end - haystack); + const char *p_upper = NULL; + if (first_lower != first_upper) { + // If the needle length is 1 we don't need to look beyond p_lower as it is a guaranteed match + size_t upper_search_length = end - (needle_len == 1 && p_lower != NULL ? p_lower : haystack); + p_upper = (const char *)memchr(haystack, first_upper, upper_search_length); + } + const char *p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper; + + if (needle_len == 1) { + return p; + } + + const char needle_end_lower = zend_tolower_ascii(needle[needle_len - 1]); + const char needle_end_upper = zend_toupper_ascii(needle[needle_len - 1]); + end -= needle_len; + + while (p && p <= end) { + if (needle_end_lower == p[needle_len - 1] || needle_end_upper == p[needle_len - 1]) { + if (zend_strnieq(needle + 1, p + 1, needle_len - 2)) { + return p; + } + } + if (p_lower == p) { + p_lower = (const char *)memchr(p_lower + 1, first_lower, end - p_lower); + } + if (p_upper == p) { + p_upper = (const char *)memchr(p_upper + 1, first_upper, end - p_upper); + } + p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper; + } + + return NULL; +} + END_EXTERN_C() diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 449bf3248313e..99d5696ebcf31 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -378,9 +378,9 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc) const char buf[] = "Content-Type:"; if (Z_TYPE_P(header) == IS_STRING && !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) { - char *needle = estrdup("charset="); + char needle[] = "charset="; char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header)); - char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1); + char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), strlen(needle)); if (encoding) { char *end; @@ -408,7 +408,6 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc) } } efree(haystack); - efree(needle); break; /* found content-type */ } } ZEND_HASH_FOREACH_END(); diff --git a/ext/phar/phar.c b/ext/phar/phar.c index 91310c21e4967..1ffa9a9d4fdd4 100644 --- a/ext/phar/phar.c +++ b/ext/phar/phar.c @@ -2531,7 +2531,6 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv { char halt_stub[] = "__HALT_COMPILER();"; zend_string *newstub; - char *tmp; phar_entry_info *entry, *newentry; size_t halt_offset; int restore_alias_len, global_flags = 0, closeoldfile; @@ -2635,9 +2634,7 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv } else { free_user_stub = 0; } - tmp = estrndup(user_stub, len); - if ((pos = php_stristr(tmp, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) { - efree(tmp); + if ((pos = php_stristr(user_stub, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) { if (closeoldfile) { php_stream_close(oldfile); } @@ -2650,8 +2647,7 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv } return EOF; } - pos = user_stub + (pos - tmp); - efree(tmp); + pos = user_stub + (pos - user_stub); len = pos - user_stub + 18; if ((size_t)len != php_stream_write(newfile, user_stub, len) || 5 != php_stream_write(newfile, " ?>\r\n", 5)) { diff --git a/ext/phar/tar.c b/ext/phar/tar.c index 07b784a0b9df2..189ee16aa1def 100644 --- a/ext/phar/tar.c +++ b/ext/phar/tar.c @@ -967,7 +967,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int int closeoldfile, free_user_stub; size_t signature_length; struct _phar_pass_tar_info pass; - char *buf, *signature, *tmp, sigbuf[8]; + char *buf, *signature, sigbuf[8]; char halt_stub[] = "__HALT_COMPILER();"; entry.flags = PHAR_ENT_PERM_DEF_FILE; @@ -1063,9 +1063,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int free_user_stub = 0; } - tmp = estrndup(user_stub, len); - if ((pos = php_stristr(tmp, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) { - efree(tmp); + if ((pos = php_stristr(user_stub, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) { if (error) { spprintf(error, 0, "illegal stub for tar-based phar \"%s\"", phar->fname); } @@ -1074,8 +1072,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int } return EOF; } - pos = user_stub + (pos - tmp); - efree(tmp); + pos = user_stub + (pos - user_stub); len = pos - user_stub + 18; entry.fp = php_stream_fopen_tmpfile(); diff --git a/ext/phar/zip.c b/ext/phar/zip.c index c5e38cabf7b87..e3beed33c3e63 100644 --- a/ext/phar/zip.c +++ b/ext/phar/zip.c @@ -1202,7 +1202,6 @@ int phar_zip_flush(phar_archive_data *phar, char *user_stub, zend_long len, int char *pos; static const char newstub[] = "fname); } @@ -1316,8 +1313,7 @@ int phar_zip_flush(phar_archive_data *phar, char *user_stub, zend_long len, int } return EOF; } - pos = user_stub + (pos - tmp); - efree(tmp); + pos = user_stub + (pos - user_stub); len = pos - user_stub + 18; entry.fp = php_stream_fopen_tmpfile(); diff --git a/ext/standard/string.c b/ext/standard/string.c index 7c2dce4595b0f..36e8c1a737e77 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1687,9 +1687,7 @@ PHP_FUNCTION(pathinfo) case insensitive strstr */ PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len) { - zend_str_tolower(s, s_len); - zend_str_tolower(t, t_len); - return (char*)php_memnstr(s, t, t_len, s + s_len); + return (char*)php_memnistr(s, t, t_len, s + s_len); } /* }}} */ @@ -1735,8 +1733,6 @@ PHP_FUNCTION(stristr) zend_string *haystack, *needle; const char *found = NULL; size_t found_offset; - char *haystack_dup; - char *orig_needle; bool part = 0; ZEND_PARSE_PARAMETERS_START(2, 3) @@ -1746,13 +1742,10 @@ PHP_FUNCTION(stristr) Z_PARAM_BOOL(part) ZEND_PARSE_PARAMETERS_END(); - haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack)); - orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle)); - found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle)); - efree(orig_needle); + found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle)); if (found) { - found_offset = found - haystack_dup; + found_offset = found - ZSTR_VAL(haystack); if (part) { RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset); } else { @@ -1761,8 +1754,6 @@ PHP_FUNCTION(stristr) } else { RETVAL_FALSE; } - - efree(haystack_dup); } /* }}} */ @@ -1890,7 +1881,6 @@ PHP_FUNCTION(stripos) const char *found = NULL; zend_string *haystack, *needle; zend_long offset = 0; - zend_string *needle_dup = NULL, *haystack_dup; ZEND_PARSE_PARAMETERS_START(2, 3) Z_PARAM_STR(haystack) @@ -1907,23 +1897,14 @@ PHP_FUNCTION(stripos) RETURN_THROWS(); } - if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) { - RETURN_FALSE; - } - - haystack_dup = zend_string_tolower(haystack); - needle_dup = zend_string_tolower(needle); - found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset, - ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack)); + found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset, + ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); if (found) { - RETVAL_LONG(found - ZSTR_VAL(haystack_dup)); + RETVAL_LONG(found - ZSTR_VAL(haystack)); } else { RETVAL_FALSE; } - - zend_string_release_ex(haystack_dup, 0); - zend_string_release_ex(needle_dup, 0); } /* }}} */ diff --git a/main/php.h b/main/php.h index 91ecbb21dc726..e8fcea5b6f607 100644 --- a/main/php.h +++ b/main/php.h @@ -344,6 +344,7 @@ END_EXTERN_C() #define phpin zendin #define php_memnstr zend_memnstr +#define php_memnistr zend_memnistr /* functions */ BEGIN_EXTERN_C()