From b611d6d597e824fec2ed497436fc463399f2bc2c Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 27 Jan 2023 16:33:52 +0000 Subject: [PATCH 1/4] Add macro to check zend_string is marked as valid UTF-8 --- Zend/zend_string.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Zend/zend_string.h b/Zend/zend_string.h index f2fe44d59eb26..7f38394ecbd40 100644 --- a/Zend/zend_string.h +++ b/Zend/zend_string.h @@ -80,6 +80,7 @@ END_EXTERN_C() /*---*/ #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED) +#define ZSTR_IS_VALID_UTF8(s) (GC_FLAGS(s) & IS_STR_VALID_UTF8) #define ZSTR_EMPTY_ALLOC() zend_empty_string #define ZSTR_CHAR(c) zend_one_char_string[c] From e0ac69d5f72eb0e7c19ecd4729fcdf1d22fbb10a Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sun, 29 Jan 2023 16:35:37 +0000 Subject: [PATCH 2/4] Add function in zend_test to check UTF8 flag is added Also add test to check what strings are marked as having the flag --- ext/zend_test/test.c | 11 ++ ext/zend_test/test.stub.php | 2 + ext/zend_test/test_arginfo.h | 8 +- .../tests/strings_marked_as_utf8.phpt | 139 ++++++++++++++++++ .../tests/strings_not_marked_as_utf8.phpt | 131 +++++++++++++++++ 5 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 ext/zend_test/tests/strings_marked_as_utf8.phpt create mode 100644 ext/zend_test/tests/strings_not_marked_as_utf8.phpt diff --git a/ext/zend_test/test.c b/ext/zend_test/test.c index 46727bb59ff29..2ebee7b423aa4 100644 --- a/ext/zend_test/test.c +++ b/ext/zend_test/test.c @@ -424,6 +424,17 @@ static ZEND_FUNCTION(zend_test_zend_ini_str) RETURN_STR(ZT_G(str_test)); } +static ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8) +{ + zend_string *str; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(str) + ZEND_PARSE_PARAMETERS_END(); + + RETURN_BOOL(ZSTR_IS_VALID_UTF8(str)); +} + static ZEND_FUNCTION(ZendTestNS2_namespaced_func) { ZEND_PARSE_PARAMETERS_NONE(); diff --git a/ext/zend_test/test.stub.php b/ext/zend_test/test.stub.php index 76f08a2831a51..4f444475f0155 100644 --- a/ext/zend_test/test.stub.php +++ b/ext/zend_test/test.stub.php @@ -178,6 +178,8 @@ function zend_test_zend_ini_str(): string {} function zend_test_zend_call_stack_get(): ?array {} function zend_test_zend_call_stack_use_all(): int {} #endif + + function zend_test_is_string_marked_as_valid_utf8(string $string): bool {} } namespace ZendTestNS { diff --git a/ext/zend_test/test_arginfo.h b/ext/zend_test/test_arginfo.h index 3c6e83e8010e5..da65f05b9aa85 100644 --- a/ext/zend_test/test_arginfo.h +++ b/ext/zend_test/test_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f19c545e86b40d999d43008882f0c151d26be121 */ + * Stub hash: 80543c60da9d2732e677375e49afc21c91bf594b */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_array_return, 0, 0, IS_ARRAY, 0) ZEND_END_ARG_INFO() @@ -107,6 +107,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_zend_call_stack_use_al ZEND_END_ARG_INFO() #endif +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_is_string_marked_as_valid_utf8, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_ZendTestNS2_namespaced_func, 0, 0, _IS_BOOL, 0) ZEND_END_ARG_INFO() @@ -201,6 +205,7 @@ static ZEND_FUNCTION(zend_test_zend_call_stack_get); #if defined(ZEND_CHECK_STACK_LIMIT) static ZEND_FUNCTION(zend_test_zend_call_stack_use_all); #endif +static ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8); static ZEND_FUNCTION(ZendTestNS2_namespaced_func); static ZEND_FUNCTION(ZendTestNS2_namespaced_deprecated_func); static ZEND_FUNCTION(ZendTestNS2_ZendSubNS_namespaced_func); @@ -257,6 +262,7 @@ static const zend_function_entry ext_functions[] = { #if defined(ZEND_CHECK_STACK_LIMIT) ZEND_FE(zend_test_zend_call_stack_use_all, arginfo_zend_test_zend_call_stack_use_all) #endif + ZEND_FE(zend_test_is_string_marked_as_valid_utf8, arginfo_zend_test_is_string_marked_as_valid_utf8) ZEND_NS_FALIAS("ZendTestNS2", namespaced_func, ZendTestNS2_namespaced_func, arginfo_ZendTestNS2_namespaced_func) ZEND_NS_DEP_FALIAS("ZendTestNS2", namespaced_deprecated_func, ZendTestNS2_namespaced_deprecated_func, arginfo_ZendTestNS2_namespaced_deprecated_func) ZEND_NS_FALIAS("ZendTestNS2", namespaced_aliased_func, zend_test_void_return, arginfo_ZendTestNS2_namespaced_aliased_func) diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt new file mode 100644 index 0000000000000..a4a6da41b7d2a --- /dev/null +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -0,0 +1,139 @@ +--TEST-- +Check that strings are marked as valid UTF-8 +--EXTENSIONS-- +zend_test +--FILE-- + +--EXPECT-- +Empty strings: +bool(true) +Known strings: +bool(true) +Integer cast to string: +string(4) "2563" +bool(false) +Float cast to string: +string(4) "26.7" +bool(false) +string(8) "2.0E+100" +bool(false) +Concatenation known valid UTF-8 strings in variables: +string(2) "fo" +bool(false) +Multiple concatenation known valid UTF-8 strings in variables: +string(3) "foo" +bool(false) +Concatenation known valid UTF-8 in assignment: +string(2) "fo" +bool(false) +Multiple concatenation known valid UTF-8 in assignment: +string(3) "foo" +bool(false) +Concatenation known valid UTF-8 string with empty string in variables: +bool(true) +bool(true) +Concatenation known valid UTF-8 string with empty string in assignment: +bool(true) +bool(true) +Concatenation in loop: +bool(false) +Concatenation in loop (compound assignment): +bool(false) +Concatenation of objects: +string(2) "zz" +bool(false) diff --git a/ext/zend_test/tests/strings_not_marked_as_utf8.phpt b/ext/zend_test/tests/strings_not_marked_as_utf8.phpt new file mode 100644 index 0000000000000..1a5210d39313f --- /dev/null +++ b/ext/zend_test/tests/strings_not_marked_as_utf8.phpt @@ -0,0 +1,131 @@ +--TEST-- +Check that invalid UTF-8 strings are NOT marked as valid UTF-8 +--EXTENSIONS-- +zend_test +--FILE-- + +--EXPECT-- +Integer cast to string concatenated to invalid UTF-8: +bool(false) +Float cast to string concatenated to invalid UTF-8: +bool(false) +bool(false) +Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: +bool(false) +Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: +bool(false) +Concatenation known valid UTF-8 with invalid UTF-8 in assignment: +bool(false) +Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment: +bool(false) +Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8: +bool(false) +bool(false) +Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8: +bool(false) +bool(false) +Concatenation in loop: +bool(false) +Concatenation in loop (compound assignment): +bool(false) +Concatenation of objects: +bool(false) From 6ac7656be0601801ef31164f902f9c0714680bf4 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 27 Jan 2023 16:30:07 +0000 Subject: [PATCH 3/4] Mark numeric strings as valid UTF-8 --- Zend/zend_operators.c | 20 ++++++++++++++----- .../tests/strings_marked_as_utf8.phpt | 6 +++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 9f6ae2073f7a8..8bfc40c6c5a9f 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -3218,7 +3218,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_long_to_str(zend_long num) /* {{{ */ } else { char buf[MAX_LENGTH_OF_LONG + 1]; char *res = zend_print_long_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } /* }}} */ @@ -3230,7 +3232,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_ulong_to_str(zend_ulong num) } else { char buf[MAX_LENGTH_OF_LONG + 1]; char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3272,7 +3276,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_u64_to_str(uint64_t num) } else { char buf[20 + 1]; char *res = zend_print_u64_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3283,7 +3289,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_i64_to_str(int64_t num) } else { char buf[20 + 1]; char *res = zend_print_i64_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3293,7 +3301,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_double_to_str(double num) /* Model snprintf precision behavior. */ int precision = (int) EG(precision); zend_gcvt(num, precision ? precision : 1, '.', 'E', buf); - return zend_string_init(buf, strlen(buf), 0); + zend_string *str = zend_string_init(buf, strlen(buf), 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } ZEND_API zend_uchar ZEND_FASTCALL is_numeric_str_function(const zend_string *str, zend_long *lval, double *dval) /* {{{ */ diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index a4a6da41b7d2a..ff84cad62aa02 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -106,12 +106,12 @@ Known strings: bool(true) Integer cast to string: string(4) "2563" -bool(false) +bool(true) Float cast to string: string(4) "26.7" -bool(false) +bool(true) string(8) "2.0E+100" -bool(false) +bool(true) Concatenation known valid UTF-8 strings in variables: string(2) "fo" bool(false) From 9f6134a3aa5c4f3e29e25833234d21da50de52f5 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sun, 29 Jan 2023 16:39:19 +0000 Subject: [PATCH 4/4] Concatenating two valid UTF-8 strings produces a valid UTF-8 string The UTF-8 valid flag needs to be copied upon interning, otherwise strings that are concatenated at compile time lose this information. However, if previously this string was interned without the flag it is not added E.g. in the case the string is an existing class name. Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- Zend/zend_operators.c | 8 +- Zend/zend_string.c | 23 ++- Zend/zend_vm_def.h | 18 ++ Zend/zend_vm_execute.h | 155 ++++++++++++++++++ ext/opcache/ZendAccelerator.c | 2 +- ext/opcache/jit/zend_jit_helpers.c | 18 ++ ext/opcache/zend_persist.c | 6 +- .../tests/strings_marked_as_utf8.phpt | 23 ++- 8 files changed, 235 insertions(+), 18 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 8bfc40c6c5a9f..5a4c30168adef 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1887,7 +1887,7 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { - zval *orig_op1 = op1; + zval *orig_op1 = op1; zval op1_copy, op2_copy; ZVAL_UNDEF(&op1_copy); @@ -1955,6 +1955,11 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1976,6 +1981,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval i_zval_ptr_dtor(result); } } + GC_ADD_FLAGS(result_str, flags); /* This has to happen first to account for the cases where result == op1 == op2 and * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 68e6084fdf60f..2d6a30d37cbaa 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { + uint32_t flags = 0; + if (ZSTR_IS_VALID_UTF8(str)) { + flags = IS_STR_VALID_UTF8; + } + zend_ulong h = ZSTR_H(str); + zend_string_delref(str); + str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); + GC_ADD_FLAGS(str, flags); + ZSTR_H(str) = h; + return str; +} + static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str) { zend_string *ret; @@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT); if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, true); } return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT); @@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string * } #endif if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, false); } ret = zend_add_interned_string(str, &CG(interned_strings), 0); diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index fc7cd81323987..efde56931d6a2 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,6 +384,11 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -412,6 +417,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -420,6 +426,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3140,6 +3147,11 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3165,6 +3177,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -3173,6 +3186,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3233,6 +3247,10 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index a1eefaa72e336..f4e1b67cbd457 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,6 +6624,11 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6649,6 +6654,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -6657,6 +6663,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -6717,6 +6724,10 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8691,6 +8702,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -8719,6 +8735,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -8727,6 +8744,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9116,6 +9134,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9141,6 +9164,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -9149,6 +9173,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9209,6 +9234,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11057,6 +11086,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11085,6 +11119,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11093,6 +11128,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11482,6 +11518,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11507,6 +11548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11515,6 +11557,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11575,6 +11618,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15088,6 +15135,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15116,6 +15168,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15124,6 +15177,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15827,6 +15881,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15852,6 +15911,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15860,6 +15920,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15920,6 +15981,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16525,6 +16590,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -16553,6 +16623,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -16561,6 +16632,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17264,6 +17336,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17289,6 +17366,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -17297,6 +17375,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17357,6 +17436,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18213,6 +18296,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18241,6 +18329,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18249,6 +18338,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18590,6 +18680,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18615,6 +18710,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18623,6 +18719,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18683,6 +18780,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40255,6 +40356,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -40283,6 +40389,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -40291,6 +40398,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42751,6 +42859,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42776,6 +42889,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -42784,6 +42898,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42844,6 +42959,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44067,6 +44186,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -44095,6 +44219,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -44103,6 +44228,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46492,6 +46618,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46517,6 +46648,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -46525,6 +46657,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46585,6 +46718,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49361,6 +49498,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -49389,6 +49531,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -49397,6 +49540,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51885,6 +52029,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -51910,6 +52059,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -51918,6 +52068,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51978,6 +52129,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 0d589c7c61d0f..4d68bc3191cb9 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -549,7 +549,7 @@ zend_string* ZEND_FASTCALL accel_new_interned_string(zend_string *str) STRTAB_COLLISION(s) = *hash_slot; *hash_slot = STRTAB_STR_TO_POS(&ZCSG(interned_strings), s); GC_SET_REFCOUNT(s, 2); - GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); + GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT)| (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); ZSTR_H(s) = h; ZSTR_LEN(s) = ZSTR_LEN(str); memcpy(ZSTR_VAL(s), ZSTR_VAL(str), ZSTR_LEN(s) + 1); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index c594ade575bed..261b456a4fc31 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,6 +1633,11 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1656,6 +1661,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); } while(0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(op1, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; @@ -1667,6 +1673,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1674,6 +1685,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z } result_str = zend_string_alloc(result_len, 0); + GC_ADD_FLAGS(result_str, flags); memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); ZVAL_NEW_STR(result, result_str); @@ -1689,6 +1701,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1710,6 +1727,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_str), op1_len); } while (0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(result, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index dcff7a7883289..7bc95b711102a 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -37,11 +37,13 @@ #define zend_set_str_gc_flags(str) do { \ GC_SET_REFCOUNT(str, 2); \ + uint32_t flags = GC_STRING | (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); \ if (file_cache_only) { \ - GC_TYPE_INFO(str) = GC_STRING | (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ + flags |= (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ } else { \ - GC_TYPE_INFO(str) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ + flags |= ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ } \ + GC_TYPE_INFO(str) = flags; \ } while (0) #define zend_accel_store_string(str) do { \ diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index ff84cad62aa02..5b6dfb6a0763d 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -47,10 +47,19 @@ $s = "f" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +// The "foo" string matches with a "Foo" class which is registered by the zend_test extension. +// That class name does not have the "valid UTF-8" flag because class names in general +// don't have to be UTF-8. As the "foo" string here goes through the interning logic, +// the string gets replaced by the "foo" string from the class, which does +// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo". +// The previous "foo" test case works because it is not interned. echo "Multiple concatenation known valid UTF-8 in assignment:\n"; $s = "f" . "o" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +$s = "f" . "x" . "o"; +var_dump($s); +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); echo "Concatenation known valid UTF-8 string with empty string in variables:\n"; $s1 = "f"; @@ -114,16 +123,18 @@ string(8) "2.0E+100" bool(true) Concatenation known valid UTF-8 strings in variables: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 strings in variables: string(3) "foo" -bool(false) +bool(true) Concatenation known valid UTF-8 in assignment: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 in assignment: string(3) "foo" bool(false) +string(3) "fxo" +bool(true) Concatenation known valid UTF-8 string with empty string in variables: bool(true) bool(true) @@ -131,9 +142,9 @@ Concatenation known valid UTF-8 string with empty string in assignment: bool(true) bool(true) Concatenation in loop: -bool(false) +bool(true) Concatenation in loop (compound assignment): -bool(false) +bool(true) Concatenation of objects: string(2) "zz" -bool(false) +bool(true)