From 2b82b67564b0eaf9dcd63b446f019f69a3159654 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 18 Nov 2023 00:41:51 +0100 Subject: [PATCH 1/2] Avoid temporary string allocations in php_mb_parse_encoding_list() This brings execution time down from 0.91s to 0.86s on the reference benchmark [1]. [1] https://github.com/php/php-src/issues/12684#issuecomment-1813799924 --- ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 8 ++++-- ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 1 + ext/mbstring/mbstring.c | 31 +++++++++++++---------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 7fee442b3a37e..78f36a0227319 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -318,6 +318,11 @@ static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len) #define NAME_HASH_MAX_NAME_LENGTH 23 const mbfl_encoding *mbfl_name2encoding(const char *name) +{ + return mbfl_name2encoding_ex(name, strlen(name)); +} + +const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len) { const mbfl_encoding *const *encoding; @@ -339,14 +344,13 @@ const mbfl_encoding *mbfl_name2encoding(const char *name) #endif /* Use perfect hash lookup for name */ - size_t name_len = strlen(name); if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) { unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len); if (key <= 186) { int8_t offset = mbfl_encoding_ptr_list_after_hashing[key]; if (offset >= 0) { encoding = mbfl_encoding_ptr_list + offset; - if (strcasecmp((*encoding)->name, name) == 0) { + if (strncasecmp((*encoding)->name, name, name_len) == 0) { return *encoding; } } diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index eee913c600cdd..2948266c7b14f 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -285,6 +285,7 @@ static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len) } MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name); +MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len); MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding); MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void); MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding); diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 5b8918ff6f9cf..4fb6e48d905c8 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -276,6 +276,14 @@ static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_nam return mbfl_name2encoding(encoding_name); } +static const mbfl_encoding *php_mb_get_encoding_or_pass_ex(const char *encoding_name, size_t encoding_name_len) { + if (strncmp(encoding_name, "pass", encoding_name_len) == 0) { + return &mbfl_encoding_pass; + } + + return mbfl_name2encoding_ex(encoding_name, encoding_name_len); +} + static size_t count_commas(const char *p, const char *end) { size_t count = 0; while ((p = memchr(p, ',', end - p))) { @@ -300,15 +308,15 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le } else { bool included_auto; size_t n, size; - char *p1, *endp, *tmpstr; + const char *p1, *endp, *tmpstr; const mbfl_encoding **entry, **list; /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { - tmpstr = (char *)estrndup(value+1, value_length-2); + tmpstr = value + 1; value_length -= 2; } else { - tmpstr = (char *)estrndup(value, value_length); + tmpstr = value; } endp = tmpstr + value_length; @@ -319,20 +327,19 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le included_auto = 0; p1 = tmpstr; while (1) { - char *comma = memchr(p1, ',', endp - p1); - char *p = comma ? comma : endp; - *p = '\0'; + const char *comma = memchr(p1, ',', endp - p1); + const char *p = comma ? comma : endp; /* trim spaces */ while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { p1++; } p--; while (p > p1 && (*p == ' ' || *p == '\t')) { - *p = '\0'; p--; } + size_t p1_length = p - p1 + 1; /* convert to the encoding number and check encoding */ - if (strcasecmp(p1, "auto") == 0) { + if (strncasecmp(p1, "auto", p1_length) == 0) { if (!included_auto) { const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); const size_t identify_list_size = MBSTRG(default_detect_order_list_size); @@ -345,15 +352,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le } } else { const mbfl_encoding *encoding = - allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1); + allow_pass_encoding ? php_mb_get_encoding_or_pass_ex(p1, p1_length) : mbfl_name2encoding_ex(p1, p1_length); if (!encoding) { /* Called from an INI setting modification */ if (arg_num == 0) { - php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1); + php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%.*s\"", (int) p1_length, p1); } else { - zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1); + zend_argument_value_error(arg_num, "contains invalid encoding \"%.*s\"", (int) p1_length, p1); } - efree(tmpstr); pefree(ZEND_VOIDP(list), persistent); return FAILURE; } @@ -368,7 +374,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le } *return_list = list; *return_size = n; - efree(tmpstr); } return SUCCESS; From c97b1f348a6294a73a311c8ab0a8b7549d9d5f48 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 18 Nov 2023 12:07:23 +0100 Subject: [PATCH 2/2] [ci skip] remove redundant comment --- ext/mbstring/mbstring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 4fb6e48d905c8..3ff441e9b19e0 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -311,7 +311,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le const char *p1, *endp, *tmpstr; const mbfl_encoding **entry, **list; - /* copy the value string for work */ if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { tmpstr = value + 1; value_length -= 2;