Skip to content

Commit 9f6134a

Browse files
Girgiasnielsdos
andcommitted
Concatenating two valid UTF-8 strings produces a valid UTF-8 string
The UTF-8 valid flag needs to be copied upon interning, otherwise strings that are concatenated at compile time lose this information. However, if previously this string was interned without the flag it is not added E.g. in the case the string is an existing class name. Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
1 parent 6ac7656 commit 9f6134a

File tree

8 files changed

+235
-18
lines changed

8 files changed

+235
-18
lines changed

Zend/zend_operators.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1887,7 +1887,7 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1,
18871887

18881888
ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */
18891889
{
1890-
zval *orig_op1 = op1;
1890+
zval *orig_op1 = op1;
18911891
zval op1_copy, op2_copy;
18921892

18931893
ZVAL_UNDEF(&op1_copy);
@@ -1955,6 +1955,11 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval
19551955
size_t op2_len = Z_STRLEN_P(op2);
19561956
size_t result_len = op1_len + op2_len;
19571957
zend_string *result_str;
1958+
uint32_t flags = 0;
1959+
1960+
if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) {
1961+
flags = IS_STR_VALID_UTF8;
1962+
}
19581963

19591964
if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) {
19601965
zend_throw_error(NULL, "String size overflow");
@@ -1976,6 +1981,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval
19761981
i_zval_ptr_dtor(result);
19771982
}
19781983
}
1984+
GC_ADD_FLAGS(result_str, flags);
19791985

19801986
/* This has to happen first to account for the cases where result == op1 == op2 and
19811987
* the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to

Zend/zend_string.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str
191191
return zend_interned_string_ht_lookup(str, &interned_strings_permanent);
192192
}
193193

194+
static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) {
195+
uint32_t flags = 0;
196+
if (ZSTR_IS_VALID_UTF8(str)) {
197+
flags = IS_STR_VALID_UTF8;
198+
}
199+
zend_ulong h = ZSTR_H(str);
200+
zend_string_delref(str);
201+
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent);
202+
GC_ADD_FLAGS(str, flags);
203+
ZSTR_H(str) = h;
204+
return str;
205+
}
206+
194207
static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str)
195208
{
196209
zend_string *ret;
@@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string
208221

209222
ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT);
210223
if (GC_REFCOUNT(str) > 1) {
211-
zend_ulong h = ZSTR_H(str);
212-
zend_string_delref(str);
213-
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1);
214-
ZSTR_H(str) = h;
224+
str = zend_init_string_for_interning(str, true);
215225
}
216226

217227
return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT);
@@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *
249259
}
250260
#endif
251261
if (GC_REFCOUNT(str) > 1) {
252-
zend_ulong h = ZSTR_H(str);
253-
zend_string_delref(str);
254-
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
255-
ZSTR_H(str) = h;
262+
str = zend_init_string_for_interning(str, false);
256263
}
257264

258265
ret = zend_add_interned_string(str, &CG(interned_strings), 0);

Zend/zend_vm_def.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,11 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_
384384
zend_string *op1_str = Z_STR_P(op1);
385385
zend_string *op2_str = Z_STR_P(op2);
386386
zend_string *str;
387+
uint32_t flags = 0;
388+
389+
if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
390+
flags = IS_STR_VALID_UTF8;
391+
}
387392

388393
if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) {
389394
if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) {
@@ -412,6 +417,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_
412417
}
413418
str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0);
414419
memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);
420+
GC_ADD_FLAGS(str, flags);
415421
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
416422
if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) {
417423
zend_string_release_ex(op2_str, 0);
@@ -420,6 +426,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_
420426
str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0);
421427
memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str));
422428
memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);
429+
GC_ADD_FLAGS(str, flags);
423430
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
424431
if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) {
425432
zend_string_release_ex(op1_str, 0);
@@ -3140,6 +3147,11 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
31403147
zend_string *op1_str = Z_STR_P(op1);
31413148
zend_string *op2_str = Z_STR_P(op2);
31423149
zend_string *str;
3150+
uint32_t flags = 0;
3151+
3152+
if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
3153+
flags = IS_STR_VALID_UTF8;
3154+
}
31433155

31443156
if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) {
31453157
if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) {
@@ -3165,6 +3177,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
31653177

31663178
str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0);
31673179
memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);
3180+
GC_ADD_FLAGS(str, flags);
31683181
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
31693182
if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) {
31703183
zend_string_release_ex(op2_str, 0);
@@ -3173,6 +3186,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
31733186
str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0);
31743187
memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str));
31753188
memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);
3189+
GC_ADD_FLAGS(str, flags);
31763190
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
31773191
if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) {
31783192
zend_string_release_ex(op1_str, 0);
@@ -3233,6 +3247,10 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
32333247
str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0);
32343248
memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str));
32353249
memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);
3250+
3251+
if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
3252+
GC_ADD_FLAGS(str, IS_STR_VALID_UTF8);
3253+
}
32363254
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
32373255
if (OP1_TYPE != IS_CONST) {
32383256
zend_string_release_ex(op1_str, 0);

0 commit comments

Comments
 (0)