diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 9f6ae2073f7a8..5a4c30168adef 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1887,7 +1887,7 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { - zval *orig_op1 = op1; + zval *orig_op1 = op1; zval op1_copy, op2_copy; ZVAL_UNDEF(&op1_copy); @@ -1955,6 +1955,11 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1976,6 +1981,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval i_zval_ptr_dtor(result); } } + GC_ADD_FLAGS(result_str, flags); /* This has to happen first to account for the cases where result == op1 == op2 and * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to @@ -3218,7 +3224,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_long_to_str(zend_long num) /* {{{ */ } else { char buf[MAX_LENGTH_OF_LONG + 1]; char *res = zend_print_long_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } /* }}} */ @@ -3230,7 +3238,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_ulong_to_str(zend_ulong num) } else { char buf[MAX_LENGTH_OF_LONG + 1]; char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3272,7 +3282,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_u64_to_str(uint64_t num) } else { char buf[20 + 1]; char *res = zend_print_u64_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3283,7 +3295,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_i64_to_str(int64_t num) } else { char buf[20 + 1]; char *res = zend_print_i64_to_buf(buf + sizeof(buf) - 1, num); - return zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + zend_string *str = zend_string_init(res, buf + sizeof(buf) - 1 - res, 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } } @@ -3293,7 +3307,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_double_to_str(double num) /* Model snprintf precision behavior. */ int precision = (int) EG(precision); zend_gcvt(num, precision ? precision : 1, '.', 'E', buf); - return zend_string_init(buf, strlen(buf), 0); + zend_string *str = zend_string_init(buf, strlen(buf), 0); + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + return str; } ZEND_API zend_uchar ZEND_FASTCALL is_numeric_str_function(const zend_string *str, zend_long *lval, double *dval) /* {{{ */ diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 68e6084fdf60f..2d6a30d37cbaa 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { + uint32_t flags = 0; + if (ZSTR_IS_VALID_UTF8(str)) { + flags = IS_STR_VALID_UTF8; + } + zend_ulong h = ZSTR_H(str); + zend_string_delref(str); + str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); + GC_ADD_FLAGS(str, flags); + ZSTR_H(str) = h; + return str; +} + static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str) { zend_string *ret; @@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT); if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, true); } return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT); @@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string * } #endif if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, false); } ret = zend_add_interned_string(str, &CG(interned_strings), 0); diff --git a/Zend/zend_string.h b/Zend/zend_string.h index f2fe44d59eb26..7f38394ecbd40 100644 --- a/Zend/zend_string.h +++ b/Zend/zend_string.h @@ -80,6 +80,7 @@ END_EXTERN_C() /*---*/ #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED) +#define ZSTR_IS_VALID_UTF8(s) (GC_FLAGS(s) & IS_STR_VALID_UTF8) #define ZSTR_EMPTY_ALLOC() zend_empty_string #define ZSTR_CHAR(c) zend_one_char_string[c] diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index fc7cd81323987..efde56931d6a2 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,6 +384,11 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -412,6 +417,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -420,6 +426,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3140,6 +3147,11 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3165,6 +3177,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -3173,6 +3186,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3233,6 +3247,10 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index a1eefaa72e336..f4e1b67cbd457 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,6 +6624,11 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6649,6 +6654,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -6657,6 +6663,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -6717,6 +6724,10 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8691,6 +8702,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -8719,6 +8735,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -8727,6 +8744,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9116,6 +9134,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9141,6 +9164,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -9149,6 +9173,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9209,6 +9234,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11057,6 +11086,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11085,6 +11119,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11093,6 +11128,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11482,6 +11518,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11507,6 +11548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11515,6 +11557,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11575,6 +11618,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15088,6 +15135,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15116,6 +15168,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15124,6 +15177,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15827,6 +15881,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15852,6 +15911,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15860,6 +15920,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15920,6 +15981,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16525,6 +16590,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -16553,6 +16623,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -16561,6 +16632,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17264,6 +17336,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17289,6 +17366,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -17297,6 +17375,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17357,6 +17436,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18213,6 +18296,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18241,6 +18329,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18249,6 +18338,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18590,6 +18680,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18615,6 +18710,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18623,6 +18719,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18683,6 +18780,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40255,6 +40356,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -40283,6 +40389,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -40291,6 +40398,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42751,6 +42859,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42776,6 +42889,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -42784,6 +42898,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42844,6 +42959,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44067,6 +44186,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -44095,6 +44219,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -44103,6 +44228,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46492,6 +46618,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46517,6 +46648,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -46525,6 +46657,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46585,6 +46718,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49361,6 +49498,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -49389,6 +49531,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -49397,6 +49540,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51885,6 +52029,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -51910,6 +52059,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -51918,6 +52068,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51978,6 +52129,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 0d589c7c61d0f..4d68bc3191cb9 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -549,7 +549,7 @@ zend_string* ZEND_FASTCALL accel_new_interned_string(zend_string *str) STRTAB_COLLISION(s) = *hash_slot; *hash_slot = STRTAB_STR_TO_POS(&ZCSG(interned_strings), s); GC_SET_REFCOUNT(s, 2); - GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); + GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT)| (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); ZSTR_H(s) = h; ZSTR_LEN(s) = ZSTR_LEN(str); memcpy(ZSTR_VAL(s), ZSTR_VAL(str), ZSTR_LEN(s) + 1); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index c594ade575bed..261b456a4fc31 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,6 +1633,11 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1656,6 +1661,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); } while(0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(op1, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; @@ -1667,6 +1673,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1674,6 +1685,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z } result_str = zend_string_alloc(result_len, 0); + GC_ADD_FLAGS(result_str, flags); memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); ZVAL_NEW_STR(result, result_str); @@ -1689,6 +1701,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1710,6 +1727,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_str), op1_len); } while (0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(result, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index dcff7a7883289..7bc95b711102a 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -37,11 +37,13 @@ #define zend_set_str_gc_flags(str) do { \ GC_SET_REFCOUNT(str, 2); \ + uint32_t flags = GC_STRING | (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); \ if (file_cache_only) { \ - GC_TYPE_INFO(str) = GC_STRING | (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ + flags |= (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ } else { \ - GC_TYPE_INFO(str) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ + flags |= ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ } \ + GC_TYPE_INFO(str) = flags; \ } while (0) #define zend_accel_store_string(str) do { \ diff --git a/ext/zend_test/test.c b/ext/zend_test/test.c index 46727bb59ff29..2ebee7b423aa4 100644 --- a/ext/zend_test/test.c +++ b/ext/zend_test/test.c @@ -424,6 +424,17 @@ static ZEND_FUNCTION(zend_test_zend_ini_str) RETURN_STR(ZT_G(str_test)); } +static ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8) +{ + zend_string *str; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(str) + ZEND_PARSE_PARAMETERS_END(); + + RETURN_BOOL(ZSTR_IS_VALID_UTF8(str)); +} + static ZEND_FUNCTION(ZendTestNS2_namespaced_func) { ZEND_PARSE_PARAMETERS_NONE(); diff --git a/ext/zend_test/test.stub.php b/ext/zend_test/test.stub.php index 76f08a2831a51..4f444475f0155 100644 --- a/ext/zend_test/test.stub.php +++ b/ext/zend_test/test.stub.php @@ -178,6 +178,8 @@ function zend_test_zend_ini_str(): string {} function zend_test_zend_call_stack_get(): ?array {} function zend_test_zend_call_stack_use_all(): int {} #endif + + function zend_test_is_string_marked_as_valid_utf8(string $string): bool {} } namespace ZendTestNS { diff --git a/ext/zend_test/test_arginfo.h b/ext/zend_test/test_arginfo.h index 3c6e83e8010e5..da65f05b9aa85 100644 --- a/ext/zend_test/test_arginfo.h +++ b/ext/zend_test/test_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f19c545e86b40d999d43008882f0c151d26be121 */ + * Stub hash: 80543c60da9d2732e677375e49afc21c91bf594b */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_array_return, 0, 0, IS_ARRAY, 0) ZEND_END_ARG_INFO() @@ -107,6 +107,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_zend_call_stack_use_al ZEND_END_ARG_INFO() #endif +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_zend_test_is_string_marked_as_valid_utf8, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_ZendTestNS2_namespaced_func, 0, 0, _IS_BOOL, 0) ZEND_END_ARG_INFO() @@ -201,6 +205,7 @@ static ZEND_FUNCTION(zend_test_zend_call_stack_get); #if defined(ZEND_CHECK_STACK_LIMIT) static ZEND_FUNCTION(zend_test_zend_call_stack_use_all); #endif +static ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8); static ZEND_FUNCTION(ZendTestNS2_namespaced_func); static ZEND_FUNCTION(ZendTestNS2_namespaced_deprecated_func); static ZEND_FUNCTION(ZendTestNS2_ZendSubNS_namespaced_func); @@ -257,6 +262,7 @@ static const zend_function_entry ext_functions[] = { #if defined(ZEND_CHECK_STACK_LIMIT) ZEND_FE(zend_test_zend_call_stack_use_all, arginfo_zend_test_zend_call_stack_use_all) #endif + ZEND_FE(zend_test_is_string_marked_as_valid_utf8, arginfo_zend_test_is_string_marked_as_valid_utf8) ZEND_NS_FALIAS("ZendTestNS2", namespaced_func, ZendTestNS2_namespaced_func, arginfo_ZendTestNS2_namespaced_func) ZEND_NS_DEP_FALIAS("ZendTestNS2", namespaced_deprecated_func, ZendTestNS2_namespaced_deprecated_func, arginfo_ZendTestNS2_namespaced_deprecated_func) ZEND_NS_FALIAS("ZendTestNS2", namespaced_aliased_func, zend_test_void_return, arginfo_ZendTestNS2_namespaced_aliased_func) diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt new file mode 100644 index 0000000000000..5b6dfb6a0763d --- /dev/null +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -0,0 +1,150 @@ +--TEST-- +Check that strings are marked as valid UTF-8 +--EXTENSIONS-- +zend_test +--FILE-- + +--EXPECT-- +Empty strings: +bool(true) +Known strings: +bool(true) +Integer cast to string: +string(4) "2563" +bool(true) +Float cast to string: +string(4) "26.7" +bool(true) +string(8) "2.0E+100" +bool(true) +Concatenation known valid UTF-8 strings in variables: +string(2) "fo" +bool(true) +Multiple concatenation known valid UTF-8 strings in variables: +string(3) "foo" +bool(true) +Concatenation known valid UTF-8 in assignment: +string(2) "fo" +bool(true) +Multiple concatenation known valid UTF-8 in assignment: +string(3) "foo" +bool(false) +string(3) "fxo" +bool(true) +Concatenation known valid UTF-8 string with empty string in variables: +bool(true) +bool(true) +Concatenation known valid UTF-8 string with empty string in assignment: +bool(true) +bool(true) +Concatenation in loop: +bool(true) +Concatenation in loop (compound assignment): +bool(true) +Concatenation of objects: +string(2) "zz" +bool(true) diff --git a/ext/zend_test/tests/strings_not_marked_as_utf8.phpt b/ext/zend_test/tests/strings_not_marked_as_utf8.phpt new file mode 100644 index 0000000000000..1a5210d39313f --- /dev/null +++ b/ext/zend_test/tests/strings_not_marked_as_utf8.phpt @@ -0,0 +1,131 @@ +--TEST-- +Check that invalid UTF-8 strings are NOT marked as valid UTF-8 +--EXTENSIONS-- +zend_test +--FILE-- + +--EXPECT-- +Integer cast to string concatenated to invalid UTF-8: +bool(false) +Float cast to string concatenated to invalid UTF-8: +bool(false) +bool(false) +Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: +bool(false) +Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: +bool(false) +Concatenation known valid UTF-8 with invalid UTF-8 in assignment: +bool(false) +Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment: +bool(false) +Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8: +bool(false) +bool(false) +Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8: +bool(false) +bool(false) +Concatenation in loop: +bool(false) +Concatenation in loop (compound assignment): +bool(false) +Concatenation of objects: +bool(false)