Skip to content

Commit 936a754

Browse files
committed
Copy the UTF-8 valid flag upon interning a string
1 parent 1f54ffb commit 936a754

File tree

2 files changed

+23
-12
lines changed

2 files changed

+23
-12
lines changed

Zend/zend_string.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str
191191
return zend_interned_string_ht_lookup(str, &interned_strings_permanent);
192192
}
193193

194+
static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) {
195+
uint32_t flags = 0;
196+
if (ZSTR_IS_VALID_UTF8(str)) {
197+
flags = IS_STR_VALID_UTF8;
198+
}
199+
zend_ulong h = ZSTR_H(str);
200+
zend_string_delref(str);
201+
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent);
202+
GC_ADD_FLAGS(str, flags);
203+
ZSTR_H(str) = h;
204+
return str;
205+
}
206+
194207
static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str)
195208
{
196209
zend_string *ret;
@@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string
208221

209222
ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT);
210223
if (GC_REFCOUNT(str) > 1) {
211-
zend_ulong h = ZSTR_H(str);
212-
zend_string_delref(str);
213-
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1);
214-
ZSTR_H(str) = h;
224+
str = zend_init_string_for_interning(str, true);
215225
}
216226

217227
return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT);
@@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string *
249259
}
250260
#endif
251261
if (GC_REFCOUNT(str) > 1) {
252-
zend_ulong h = ZSTR_H(str);
253-
zend_string_delref(str);
254-
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
255-
ZSTR_H(str) = h;
262+
str = zend_init_string_for_interning(str, false);
256263
}
257264

258265
ret = zend_add_interned_string(str, &CG(interned_strings), 0);

ext/zend_test/tests/strings_marked_as_utf8.phpt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
Check that strings are marked as valid UTF-8
33
--EXTENSIONS--
44
zend_test
5-
--XFAIL--
6-
Flag is not set in assignment when concatenating known UTF-8 strings
75
--FILE--
86
<?php
97
echo "Empty strings:\n";
@@ -49,8 +47,14 @@ $s = "f" . "o";
4947
var_dump($s);
5048
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
5149

50+
// The "foo" string matches with a "Foo" class which is registered by the zend_test extension.
51+
// That class name does not have the "valid UTF-8" flag because class names in general
52+
// don't have to be UTF-8. As the "foo" string here goes through the interning logic,
53+
// the string gets replaced by the "foo" string from the class, which does
54+
// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo".
55+
// The previous "foo" test case works because it is not interned.
5256
echo "Multiple concatenation known valid UTF-8 in assignment:\n";
53-
$s = "f" . "o" . "o";
57+
$s = "f" . "x" . "o";
5458
var_dump($s);
5559
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
5660

@@ -124,7 +128,7 @@ Concatenation known valid UTF-8 in assignment:
124128
string(2) "fo"
125129
bool(true)
126130
Multiple concatenation known valid UTF-8 in assignment:
127-
string(3) "foo"
131+
string(3) "fxo"
128132
bool(true)
129133
Concatenation known valid UTF-8 string with empty string in variables:
130134
bool(true)

0 commit comments

Comments
 (0)