Skip to content

Avoid string copies in ext/intl after string conversion #18636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,10 @@ PHP 8.5 UPGRADE NOTES
. Add OPcode specialization for `=== []` and `!== []` comparisons.
. Creating exception objects is now much faster.

- Intl:
. Now avoids creating extra string copies when converting strings
for use in the collator.

- ReflectionProperty:
. Improved performance of the following methods: getValue(), getRawValue(),
isInitialized(), setValue(), setRawValue().
Expand Down
37 changes: 10 additions & 27 deletions ext/intl/collator/collator_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
{
const char* old_val;
size_t old_val_len;
UChar* new_val = NULL;
int32_t new_val_len = 0;
zval znew_val;

/* Process string values only. */
Expand All @@ -49,17 +47,13 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
old_val = Z_STRVAL_P( hashData );
old_val_len = Z_STRLEN_P( hashData );

/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
/* Convert it from UTF-8 to UTF-16LE. */
zend_string *zstr = intl_convert_utf8_to_utf16_zstr( old_val, old_val_len, status );
if( U_FAILURE( *status ) )
return;

/* Update current hash item with the converted value. */
ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
//???
efree(new_val);
/* hack to fix use of initialized value */
Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
ZVAL_NEW_STR( &znew_val, zstr );

if( hashKey)
{
Expand Down Expand Up @@ -176,23 +170,19 @@ zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )

zend_string *collator_convert_zstr_utf8_to_utf16(zend_string *utf8_str)
{
UChar *ustr = NULL;
int32_t ustr_len = 0;
UErrorCode status = U_ZERO_ERROR;

/* Convert the string to UTF-16. */
intl_convert_utf8_to_utf16(
&ustr, &ustr_len,
zend_string *zstr = intl_convert_utf8_to_utf16_zstr(
ZSTR_VAL(utf8_str), ZSTR_LEN(utf8_str),
&status);
// FIXME Or throw error or use intl internal error handler
if (U_FAILURE(status)) {
php_error(E_WARNING,
"Error casting object to string in collator_convert_zstr_utf8_to_utf16()");
zstr = ZSTR_EMPTY_ALLOC();
}

zend_string *zstr = zend_string_init((char *) ustr, UBYTES(ustr_len), 0);
efree((char *)ustr);
return zstr;
}

Expand All @@ -203,8 +193,6 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
{
zval* zstr = NULL;
UErrorCode status = U_ZERO_ERROR;
UChar* ustr = NULL;
int32_t ustr_len = 0;

/* Bail out if it's not an object. */
if( Z_TYPE_P( obj ) != IS_OBJECT )
Expand All @@ -229,25 +217,20 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
}

/* Convert the string to UTF-16. */
intl_convert_utf8_to_utf16(
&ustr, &ustr_len,
zend_string *converted_str = intl_convert_utf8_to_utf16_zstr(
Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
&status );
// FIXME Or throw error or use intl internal error handler
if( U_FAILURE( status ) )
if( U_FAILURE( status ) ) {
php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
converted_str = ZSTR_EMPTY_ALLOC();
}

/* Cleanup zstr to hold utf16 string. */
zval_ptr_dtor_str( zstr );

/* Set string. */
ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
//???
efree((char *)ustr);

/* Don't free ustr cause it's set in zstr without copy.
* efree( ustr );
*/
ZVAL_STR( zstr, converted_str );

return zstr;
}
Expand Down
48 changes: 48 additions & 0 deletions ext/intl/intl_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,54 @@ void intl_convert_utf8_to_utf16(
}
/* }}} */

/* Convert given string from UTF-8 to UTF-16 to a zend_string*
*
* @param src String to convert.
* @param src_len Length of the source string.
* @param status Conversion status.
*
* @return zend_string* on success and NULL on failure
*/
zend_string *intl_convert_utf8_to_utf16_zstr(
const char* src, size_t src_len,
UErrorCode* status )
{
int32_t dst_len = 0;

*status = U_ZERO_ERROR;

if(src_len > INT32_MAX) {
/* we cannot fit this string */
*status = U_BUFFER_OVERFLOW_ERROR;
return NULL;
}

/* Pre-flight */
u_strFromUTF8( NULL, 0, &dst_len, src, (int32_t)src_len, status );
if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
return NULL;

/* Note: l=sizeof(UChar)-1 because we need sizeof(UChar) bytes for the NUL terminator instead of 1. */
zend_string *dst = zend_string_safe_alloc(sizeof(UChar), dst_len, sizeof(UChar) - 1, false);
UChar *dst_buf = (UChar *) ZSTR_VAL(dst);
ZEND_ASSERT((ZSTR_LEN(dst) - 1) / 2 == dst_len);
/* However, the length must not include the NUL terminator that we included previously. */
ZSTR_LEN(dst)--;

/* Convert source string from UTF-8 to UTF-16. */
*status = U_ZERO_ERROR;
u_strFromUTF8( dst_buf, dst_len + 1, NULL, src, src_len, status );
if( U_FAILURE( *status ) )
{
zend_string_efree( dst );
return NULL;
}

dst_buf[dst_len] = 0;

return dst;
}

/* {{{ intl_convert_utf16_to_utf8
* Convert given string from UTF-16 to UTF-8.
*
Expand Down
4 changes: 4 additions & 0 deletions ext/intl/intl_convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ void intl_convert_utf8_to_utf16(
const char* src, size_t src_len,
UErrorCode* status );

zend_string *intl_convert_utf8_to_utf16_zstr(
const char* src, size_t src_len,
UErrorCode* status );

zend_string* intl_convert_utf16_to_utf8(
const UChar* src, int32_t src_len,
UErrorCode* status );
Expand Down