Skip to content

Commit b8fa165

Browse files
authored
Avoid string copies in ext/intl after string conversion (#18636)
Introduces intl_convert_utf8_to_utf16_zstr() to convert a UTF-8 string to a UTF-16 string zend_string* instance. This way we avoid a double copy later from a UChar* into a zend_string*.
1 parent 16e154a commit b8fa165

File tree

4 files changed

+66
-27
lines changed

4 files changed

+66
-27
lines changed

UPGRADING

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,10 @@ PHP 8.5 UPGRADE NOTES
526526
. Add OPcode specialization for `=== []` and `!== []` comparisons.
527527
. Creating exception objects is now much faster.
528528

529+
- Intl:
530+
. Now avoids creating extra string copies when converting strings
531+
for use in the collator.
532+
529533
- ReflectionProperty:
530534
. Improved performance of the following methods: getValue(), getRawValue(),
531535
isInitialized(), setValue(), setRawValue().

ext/intl/collator/collator_convert.c

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
3838
{
3939
const char* old_val;
4040
size_t old_val_len;
41-
UChar* new_val = NULL;
42-
int32_t new_val_len = 0;
4341
zval znew_val;
4442

4543
/* Process string values only. */
@@ -49,17 +47,13 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
4947
old_val = Z_STRVAL_P( hashData );
5048
old_val_len = Z_STRLEN_P( hashData );
5149

52-
/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
53-
intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
50+
/* Convert it from UTF-8 to UTF-16LE. */
51+
zend_string *zstr = intl_convert_utf8_to_utf16_zstr( old_val, old_val_len, status );
5452
if( U_FAILURE( *status ) )
5553
return;
5654

5755
/* Update current hash item with the converted value. */
58-
ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
59-
//???
60-
efree(new_val);
61-
/* hack to fix use of initialized value */
62-
Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
56+
ZVAL_NEW_STR( &znew_val, zstr );
6357

6458
if( hashKey)
6559
{
@@ -176,23 +170,19 @@ zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
176170

177171
zend_string *collator_convert_zstr_utf8_to_utf16(zend_string *utf8_str)
178172
{
179-
UChar *ustr = NULL;
180-
int32_t ustr_len = 0;
181173
UErrorCode status = U_ZERO_ERROR;
182174

183175
/* Convert the string to UTF-16. */
184-
intl_convert_utf8_to_utf16(
185-
&ustr, &ustr_len,
176+
zend_string *zstr = intl_convert_utf8_to_utf16_zstr(
186177
ZSTR_VAL(utf8_str), ZSTR_LEN(utf8_str),
187178
&status);
188179
// FIXME Or throw error or use intl internal error handler
189180
if (U_FAILURE(status)) {
190181
php_error(E_WARNING,
191182
"Error casting object to string in collator_convert_zstr_utf8_to_utf16()");
183+
zstr = ZSTR_EMPTY_ALLOC();
192184
}
193185

194-
zend_string *zstr = zend_string_init((char *) ustr, UBYTES(ustr_len), 0);
195-
efree((char *)ustr);
196186
return zstr;
197187
}
198188

@@ -203,8 +193,6 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
203193
{
204194
zval* zstr = NULL;
205195
UErrorCode status = U_ZERO_ERROR;
206-
UChar* ustr = NULL;
207-
int32_t ustr_len = 0;
208196

209197
/* Bail out if it's not an object. */
210198
if( Z_TYPE_P( obj ) != IS_OBJECT )
@@ -229,25 +217,20 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
229217
}
230218

231219
/* Convert the string to UTF-16. */
232-
intl_convert_utf8_to_utf16(
233-
&ustr, &ustr_len,
220+
zend_string *converted_str = intl_convert_utf8_to_utf16_zstr(
234221
Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
235222
&status );
236223
// FIXME Or throw error or use intl internal error handler
237-
if( U_FAILURE( status ) )
224+
if( U_FAILURE( status ) ) {
238225
php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
226+
converted_str = ZSTR_EMPTY_ALLOC();
227+
}
239228

240229
/* Cleanup zstr to hold utf16 string. */
241230
zval_ptr_dtor_str( zstr );
242231

243232
/* Set string. */
244-
ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
245-
//???
246-
efree((char *)ustr);
247-
248-
/* Don't free ustr cause it's set in zstr without copy.
249-
* efree( ustr );
250-
*/
233+
ZVAL_STR( zstr, converted_str );
251234

252235
return zstr;
253236
}

ext/intl/intl_convert.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,54 @@ void intl_convert_utf8_to_utf16(
104104
}
105105
/* }}} */
106106

107+
/* Convert given string from UTF-8 to UTF-16 to a zend_string*
108+
*
109+
* @param src String to convert.
110+
* @param src_len Length of the source string.
111+
* @param status Conversion status.
112+
*
113+
* @return zend_string* on success and NULL on failure
114+
*/
115+
zend_string *intl_convert_utf8_to_utf16_zstr(
116+
const char* src, size_t src_len,
117+
UErrorCode* status )
118+
{
119+
int32_t dst_len = 0;
120+
121+
*status = U_ZERO_ERROR;
122+
123+
if(src_len > INT32_MAX) {
124+
/* we cannot fit this string */
125+
*status = U_BUFFER_OVERFLOW_ERROR;
126+
return NULL;
127+
}
128+
129+
/* Pre-flight */
130+
u_strFromUTF8( NULL, 0, &dst_len, src, (int32_t)src_len, status );
131+
if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
132+
return NULL;
133+
134+
/* Note: l=sizeof(UChar)-1 because we need sizeof(UChar) bytes for the NUL terminator instead of 1. */
135+
zend_string *dst = zend_string_safe_alloc(sizeof(UChar), dst_len, sizeof(UChar) - 1, false);
136+
UChar *dst_buf = (UChar *) ZSTR_VAL(dst);
137+
ZEND_ASSERT((ZSTR_LEN(dst) - 1) / 2 == dst_len);
138+
/* However, the length must not include the NUL terminator that we included previously. */
139+
ZSTR_LEN(dst)--;
140+
141+
/* Convert source string from UTF-8 to UTF-16. */
142+
*status = U_ZERO_ERROR;
143+
u_strFromUTF8( dst_buf, dst_len + 1, NULL, src, src_len, status );
144+
if( U_FAILURE( *status ) )
145+
{
146+
zend_string_efree( dst );
147+
return NULL;
148+
}
149+
150+
dst_buf[dst_len] = 0;
151+
152+
return dst;
153+
}
154+
107155
/* {{{ intl_convert_utf16_to_utf8
108156
* Convert given string from UTF-16 to UTF-8.
109157
*

ext/intl/intl_convert.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ void intl_convert_utf8_to_utf16(
2323
const char* src, size_t src_len,
2424
UErrorCode* status );
2525

26+
zend_string *intl_convert_utf8_to_utf16_zstr(
27+
const char* src, size_t src_len,
28+
UErrorCode* status );
29+
2630
zend_string* intl_convert_utf16_to_utf8(
2731
const UChar* src, int32_t src_len,
2832
UErrorCode* status );

0 commit comments

Comments
 (0)