Skip to content

Avoid temporary string allocations in php_mb_parse_encoding_list() #12714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,11 @@ static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
#define NAME_HASH_MAX_NAME_LENGTH 23

const mbfl_encoding *mbfl_name2encoding(const char *name)
{
return mbfl_name2encoding_ex(name, strlen(name));
}

const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
{
const mbfl_encoding *const *encoding;

Expand All @@ -339,14 +344,13 @@ const mbfl_encoding *mbfl_name2encoding(const char *name)
#endif

/* Use perfect hash lookup for name */
size_t name_len = strlen(name);
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
if (key <= 186) {
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
if (offset >= 0) {
encoding = mbfl_encoding_ptr_list + offset;
if (strcasecmp((*encoding)->name, name) == 0) {
if (strncasecmp((*encoding)->name, name, name_len) == 0) {
return *encoding;
}
}
Expand Down
1 change: 1 addition & 0 deletions ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
}

MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new function can be used in more places

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, there are places where we can pass ZSTR_LEN(something). Although that would be a behaviour change because those places seem to allow \0 in strings (the function was never able to handle \0 in strings and silently cut off the string there, but passing ZSTR_LEN would thus change the \0 behaviour). I see you're making a PR to refactor some stuff anyway, so feel free to take a look at those places :)

MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
Expand Down
32 changes: 18 additions & 14 deletions ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,14 @@ static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_nam
return mbfl_name2encoding(encoding_name);
}

static const mbfl_encoding *php_mb_get_encoding_or_pass_ex(const char *encoding_name, size_t encoding_name_len) {
if (strncmp(encoding_name, "pass", encoding_name_len) == 0) {
return &mbfl_encoding_pass;
}

return mbfl_name2encoding_ex(encoding_name, encoding_name_len);
}

static size_t count_commas(const char *p, const char *end) {
size_t count = 0;
while ((p = memchr(p, ',', end - p))) {
Expand All @@ -300,15 +308,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
} else {
bool included_auto;
size_t n, size;
char *p1, *endp, *tmpstr;
const char *p1, *endp, *tmpstr;
const mbfl_encoding **entry, **list;

/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
tmpstr = value + 1;
value_length -= 2;
} else {
tmpstr = (char *)estrndup(value, value_length);
tmpstr = value;
}

endp = tmpstr + value_length;
Expand All @@ -319,20 +326,19 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
included_auto = 0;
p1 = tmpstr;
while (1) {
char *comma = memchr(p1, ',', endp - p1);
char *p = comma ? comma : endp;
*p = '\0';
const char *comma = memchr(p1, ',', endp - p1);
const char *p = comma ? comma : endp;
/* trim spaces */
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
p1++;
}
p--;
while (p > p1 && (*p == ' ' || *p == '\t')) {
*p = '\0';
p--;
}
size_t p1_length = p - p1 + 1;
/* convert to the encoding number and check encoding */
if (strcasecmp(p1, "auto") == 0) {
if (strncasecmp(p1, "auto", p1_length) == 0) {
if (!included_auto) {
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
Expand All @@ -345,15 +351,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
}
} else {
const mbfl_encoding *encoding =
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
allow_pass_encoding ? php_mb_get_encoding_or_pass_ex(p1, p1_length) : mbfl_name2encoding_ex(p1, p1_length);
if (!encoding) {
/* Called from an INI setting modification */
if (arg_num == 0) {
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%.*s\"", (int) p1_length, p1);
} else {
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
zend_argument_value_error(arg_num, "contains invalid encoding \"%.*s\"", (int) p1_length, p1);
}
efree(tmpstr);
pefree(ZEND_VOIDP(list), persistent);
return FAILURE;
}
Expand All @@ -368,7 +373,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
}
*return_list = list;
*return_size = n;
efree(tmpstr);
}

return SUCCESS;
Expand Down