diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 5a4c30168adef..1eaa11eccf793 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1955,11 +1955,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 2d6a30d37cbaa..d65101e778c14 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,11 +191,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } -static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { - uint32_t flags = 0; - if (ZSTR_IS_VALID_UTF8(str)) { - flags = IS_STR_VALID_UTF8; - } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) +{ + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(str); zend_ulong h = ZSTR_H(str); zend_string_delref(str); str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); diff --git a/Zend/zend_string.h b/Zend/zend_string.h index d7bca020c3e19..3eca7343a008b 100644 --- a/Zend/zend_string.h +++ b/Zend/zend_string.h @@ -82,6 +82,27 @@ END_EXTERN_C() #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED) #define ZSTR_IS_VALID_UTF8(s) (GC_FLAGS(s) & IS_STR_VALID_UTF8) +/* These are properties, encoded as flags, that will hold on the resulting string + * after concatenating two strings that have these property. + * Example: concatenating two UTF-8 strings yields another UTF-8 string. */ +#define ZSTR_COPYABLE_CONCAT_PROPERTIES (IS_STR_VALID_UTF8) + +#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(s) (GC_FLAGS(s) & ZSTR_COPYABLE_CONCAT_PROPERTIES) +/* This macro returns the copyable concat properties which hold on both strings. */ +#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(s1, s2) (GC_FLAGS(s1) & GC_FLAGS(s2) & ZSTR_COPYABLE_CONCAT_PROPERTIES) + +#define ZSTR_COPY_CONCAT_PROPERTIES(out, in) do { \ + zend_string *_out = (out); \ + uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES((in)); \ + GC_ADD_FLAGS(_out, properties); \ +} while (0) + +#define ZSTR_COPY_CONCAT_PROPERTIES_BOTH(out, in1, in2) do { \ + zend_string *_out = (out); \ + uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH((in1), (in2)); \ + GC_ADD_FLAGS(_out, properties); \ +} while (0) + #define ZSTR_EMPTY_ALLOC() zend_empty_string #define ZSTR_CHAR(c) zend_one_char_string[c] #define ZSTR_KNOWN(idx) zend_known_strings[idx] diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index ca062b9512ac8..91864e7a7273b 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,11 +384,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3147,11 +3143,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3248,9 +3240,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index d9dd9b5890241..ee249b51f308a 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,11 +6624,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6725,9 +6721,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8702,11 +8696,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9134,11 +9124,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9235,9 +9221,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11086,11 +11070,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11518,11 +11498,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11619,9 +11595,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15135,11 +15109,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15881,11 +15851,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15982,9 +15948,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16590,11 +16554,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17336,11 +17296,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17437,9 +17393,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18296,11 +18250,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18680,11 +18630,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18781,9 +18727,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40356,11 +40300,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42859,11 +42799,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42960,9 +42896,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44186,11 +44120,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46618,11 +46548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46719,9 +46645,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49498,11 +49422,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -52029,11 +51949,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -52130,9 +52046,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index 261b456a4fc31..9c4b80e28513e 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,11 +1633,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1673,11 +1669,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1701,11 +1693,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); diff --git a/ext/standard/string.c b/ext/standard/string.c index 622ed3a9d4369..881e172c6c260 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -5059,6 +5059,7 @@ PHP_FUNCTION(str_repeat) /* Initialize the result string */ result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0); result_len = ZSTR_LEN(input_str) * mult; + ZSTR_COPY_CONCAT_PROPERTIES(result, input_str); /* Heavy optimization for situations where input string is 1 byte long */ if (ZSTR_LEN(input_str) == 1) { diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index 5b6dfb6a0763d..1519459f1dad1 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -107,6 +107,14 @@ $s = $o . $o; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +echo "str_repeat:\n"; +$string = "a"; +$string_concat = str_repeat($string, 100); +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); +$string = "\xff"; +$string_concat = str_repeat($string, 100); +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); + ?> --EXPECT-- Empty strings: @@ -148,3 +156,6 @@ bool(true) Concatenation of objects: string(2) "zz" bool(true) +str_repeat: +bool(true) +bool(false)