Skip to content

Commit e03178e

Browse files
committed
Add test to ensure invalid UTF-8 strings are not marked as valid
1 parent 57d8536 commit e03178e

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
--TEST--
2+
Check that invalid UTF-8 strings are NOT marked as valid UTF-8
3+
--EXTENSIONS--
4+
zend_test
5+
--FILE--
6+
<?php
7+
// Invalid 2 Octet Sequence
8+
$non_utf8 = "\xc3\x28";
9+
10+
echo "Integer cast to string concatenated to invalid UTF-8:\n";
11+
$i = 2563;
12+
$s = (string) $i;
13+
$s .= "\xc3\x28";
14+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
15+
16+
echo "Float cast to string concatenated to invalid UTF-8:\n";
17+
$f = 26.7;
18+
$s = (string) $f;
19+
$s .= "\xc3\x28";
20+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
21+
$f = 2e100;
22+
$s = (string) $f;
23+
$s .= "\xc3\x28";
24+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
25+
26+
echo "Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n";
27+
$s1 = "f";
28+
$s2 = "o";
29+
$s = $s1 . $s2;
30+
$s = $s . $non_utf8;
31+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
32+
33+
echo "Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n";
34+
$s1 = "f";
35+
$s2 = "o";
36+
$s = $s1 . $s2 . $non_utf8;
37+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
38+
39+
echo "Concatenation known valid UTF-8 with invalid UTF-8 in assignment:\n";
40+
$s = "f" . "\xc3\x28";
41+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
42+
43+
// The "foo" string matches with a "Foo" class which is registered by the zend_test extension.
44+
// That class name does not have the "valid UTF-8" flag because class names in general
45+
// don't have to be UTF-8. As the "foo" string here goes through the interning logic,
46+
// the string gets replaced by the "foo" string from the class, which does
47+
// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo".
48+
// The previous "foo" test case works because it is not interned.
49+
echo "Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:\n";
50+
$s = "f" . "o" . "\xc3\x28";
51+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
52+
53+
echo "Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:\n";
54+
$s1 = "f";
55+
$s2 = "";
56+
$s = $s1 . $s2;
57+
$s = $s . $non_utf8;
58+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
59+
$s1 = "f";
60+
$s2 = "";
61+
$s = $s2 . $s1;
62+
$s = $s . $non_utf8;
63+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
64+
65+
echo "Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:\n";
66+
$s = "f" . "";
67+
$s = $s . $non_utf8;
68+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
69+
$s = "" . "f";
70+
$s = $s . $non_utf8;
71+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
72+
73+
echo "Concatenation in loop:\n";
74+
const COPY_TIMES = 10_000;
75+
$string = "a";
76+
77+
$string_concat = $string;
78+
for ($i = 1; $i < COPY_TIMES; $i++) {
79+
$string_concat = $string_concat . $string;
80+
}
81+
$string_concat = $string_concat . $non_utf8;
82+
var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
83+
84+
echo "Concatenation in loop (compound assignment):\n";
85+
$string = "a";
86+
87+
$string_concat = $string;
88+
for ($i = 1; $i < COPY_TIMES; $i++) {
89+
$string_concat .= $string;
90+
}
91+
$string_concat = $string_concat . $non_utf8;
92+
var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
93+
94+
echo "Concatenation of objects:\n";
95+
class ToString {
96+
public function __toString() : string{
97+
return "z";
98+
}
99+
}
100+
$o = new ToString();
101+
$s = $o . $o;
102+
$s = $s . $non_utf8;
103+
var_dump(zend_test_is_string_marked_as_valid_utf8($s));
104+
105+
?>
106+
--EXPECT--
107+
Integer cast to string concatenated to invalid UTF-8:
108+
bool(false)
109+
Float cast to string concatenated to invalid UTF-8:
110+
bool(false)
111+
bool(false)
112+
Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:
113+
bool(false)
114+
Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:
115+
bool(false)
116+
Concatenation known valid UTF-8 with invalid UTF-8 in assignment:
117+
bool(false)
118+
Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:
119+
bool(false)
120+
Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:
121+
bool(false)
122+
bool(false)
123+
Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:
124+
bool(false)
125+
bool(false)
126+
Concatenation in loop:
127+
bool(false)
128+
Concatenation in loop (compound assignment):
129+
bool(false)
130+
Concatenation of objects:
131+
bool(false)

0 commit comments

Comments
 (0)