Skip to content

Commit 4920211

Browse files
committed
php_mb_convert_encoding{,_ex} returns zend_string
That's what all existing callers want anyways. This avoids 2 unnecessary copies of the converted string.
1 parent e2c4fc5 commit 4920211

File tree

3 files changed

+19
-66
lines changed

3 files changed

+19
-66
lines changed

ext/mbstring/mbstring.c

Lines changed: 13 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,30 +2351,18 @@ static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
23512351
return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
23522352
}
23532353

2354-
MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
2354+
MBSTRING_API zend_string* php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding)
23552355
{
23562356
unsigned int num_errors = 0;
23572357
zend_string *result = mb_fast_convert((unsigned char*)input, length, from_encoding, to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
2358-
23592358
MBSTRG(illegalchars) += num_errors;
2360-
*output_len = ZSTR_LEN(result);
2361-
2362-
char *output = emalloc(ZSTR_LEN(result) + 1);
2363-
memcpy(output, ZSTR_VAL(result), ZSTR_LEN(result) + 1);
2364-
efree(result);
2365-
return output;
2359+
return result;
23662360
}
2367-
/* }}} */
23682361

2369-
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2370-
MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
2362+
MBSTRING_API zend_string* php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
23712363
{
23722364
const mbfl_encoding *from_encoding;
23732365

2374-
if (output_len) {
2375-
*output_len = 0;
2376-
}
2377-
23782366
/* pre-conversion encoding */
23792367
ZEND_ASSERT(num_from_encodings >= 1);
23802368
if (num_from_encodings == 1) {
@@ -2393,18 +2381,15 @@ MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, con
23932381
}
23942382
}
23952383

2396-
return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
2384+
return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding);
23972385
}
2398-
/* }}} */
23992386

24002387
MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
24012388
{
24022389
HashTable *output, *chash;
24032390
zend_long idx;
24042391
zend_string *key;
24052392
zval *entry, entry_tmp;
2406-
size_t ckey_len, cval_len;
2407-
char *ckey, *cval;
24082393

24092394
if (!input) {
24102395
return NULL;
@@ -2420,22 +2405,14 @@ MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, cons
24202405
ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
24212406
/* convert key */
24222407
if (key) {
2423-
ckey = php_mb_convert_encoding(
2424-
ZSTR_VAL(key), ZSTR_LEN(key),
2425-
to_encoding, from_encodings, num_from_encodings, &ckey_len);
2426-
key = zend_string_init(ckey, ckey_len, 0);
2427-
efree(ckey);
2408+
key = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), to_encoding, from_encodings, num_from_encodings);
24282409
}
24292410
/* convert value */
24302411
ZEND_ASSERT(entry);
24312412
try_again:
24322413
switch(Z_TYPE_P(entry)) {
24332414
case IS_STRING:
2434-
cval = php_mb_convert_encoding(
2435-
Z_STRVAL_P(entry), Z_STRLEN_P(entry),
2436-
to_encoding, from_encodings, num_from_encodings, &cval_len);
2437-
ZVAL_STRINGL(&entry_tmp, cval, cval_len);
2438-
efree(cval);
2415+
ZVAL_STR(&entry_tmp, php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), to_encoding, from_encodings, num_from_encodings));
24392416
break;
24402417
case IS_NULL:
24412418
case IS_TRUE:
@@ -2545,23 +2522,9 @@ PHP_FUNCTION(mb_convert_encoding)
25452522
}
25462523

25472524
if (input_str) {
2548-
if (num_from_encodings == 1) {
2549-
const mbfl_encoding *from_encoding = from_encodings[0];
2550-
if (from_encoding->to_wchar && to_encoding->from_wchar) {
2551-
unsigned int num_errors = 0;
2552-
RETVAL_STR(mb_fast_convert((unsigned char*)ZSTR_VAL(input_str), ZSTR_LEN(input_str), from_encoding, to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors));
2553-
MBSTRG(illegalchars) += num_errors;
2554-
goto out;
2555-
}
2556-
}
2557-
2558-
size_t size;
2559-
char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str),
2560-
to_encoding, from_encodings, num_from_encodings, &size);
2525+
zend_string *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), to_encoding, from_encodings, num_from_encodings);
25612526
if (ret != NULL) {
2562-
// TODO: avoid reallocation ???
2563-
RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
2564-
efree(ret);
2527+
RETVAL_STR(ret);
25652528
} else {
25662529
RETVAL_FALSE;
25672530
}
@@ -2572,7 +2535,6 @@ PHP_FUNCTION(mb_convert_encoding)
25722535
RETVAL_ARR(tmp);
25732536
}
25742537

2575-
out:
25762538
if (free_from_encodings) {
25772539
efree(ZEND_VOIDP(from_encodings));
25782540
}
@@ -4135,20 +4097,16 @@ static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint3
41354097
buf[2] = (cp >> 8) & 0xff;
41364098
buf[3] = cp & 0xff;
41374099

4138-
size_t ret_len;
41394100
long orig_illegalchars = MBSTRG(illegalchars);
41404101
MBSTRG(illegalchars) = 0;
4141-
char *ret_str = php_mb_convert_encoding_ex(buf, 4, enc, &mbfl_encoding_ucs4be, &ret_len);
4102+
ret = php_mb_convert_encoding_ex(buf, 4, enc, &mbfl_encoding_ucs4be);
4103+
41424104
if (MBSTRG(illegalchars) != 0) {
4143-
efree(ret_str);
4144-
MBSTRG(illegalchars) = orig_illegalchars;
4145-
return NULL;
4105+
zend_string_release(ret);
4106+
ret = NULL;
41464107
}
41474108

4148-
ret = zend_string_init(ret_str, ret_len, 0);
4149-
efree(ret_str);
41504109
MBSTRG(illegalchars) = orig_illegalchars;
4151-
41524110
return ret;
41534111
}
41544112

@@ -4192,11 +4150,7 @@ PHP_FUNCTION(mb_scrub)
41924150
RETURN_THROWS();
41934151
}
41944152

4195-
size_t ret_len;
4196-
char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
4197-
4198-
RETVAL_STRINGL(ret, ret_len);
4199-
efree(ret);
4153+
RETURN_STR(php_mb_convert_encoding_ex(str, str_len, enc, enc));
42004154
}
42014155
/* }}} */
42024156

ext/mbstring/mbstring.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ PHP_MINFO_FUNCTION(mbstring);
5555

5656
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc);
5757

58-
MBSTRING_API char *php_mb_convert_encoding_ex(
58+
MBSTRING_API zend_string* php_mb_convert_encoding_ex(
5959
const char *input, size_t length,
60-
const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len);
61-
MBSTRING_API char * php_mb_convert_encoding(
60+
const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding);
61+
MBSTRING_API zend_string* php_mb_convert_encoding(
6262
const char *input, size_t length, const mbfl_encoding *to_encoding,
63-
const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len);
63+
const mbfl_encoding **from_encodings, size_t num_from_encodings);
6464

6565
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s, const mbfl_encoding *enc);
6666

sapi/fuzzer/fuzzer-mbstring.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
5151
return 0;
5252
}
5353

54-
size_t output_len;
55-
char *Result = php_mb_convert_encoding_ex((char *) Data, Size, ToEncoding, FromEncoding, &output_len);
56-
efree(Result);
54+
zend_string *Result = php_mb_convert_encoding_ex((char *) Data, Size, ToEncoding, FromEncoding);
55+
zend_string_release(Result);
5756
efree(ToEncodingName);
5857
efree(FromEncodingName);
5958

0 commit comments

Comments
 (0)