|
| 1 | +--TEST-- |
| 2 | +Check that invalid UTF-8 strings are NOT marked as valid UTF-8 |
| 3 | +--EXTENSIONS-- |
| 4 | +zend_test |
| 5 | +--FILE-- |
| 6 | +<?php |
| 7 | +// Invalid 2 Octet Sequence |
| 8 | +$non_utf8 = "\xc3\x28"; |
| 9 | + |
| 10 | +echo "Integer cast to string concatenated to invalid UTF-8:\n"; |
| 11 | +$i = 2563; |
| 12 | +$s = (string) $i; |
| 13 | +$s .= "\xc3\x28"; |
| 14 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 15 | + |
| 16 | +echo "Float cast to string concatenated to invalid UTF-8:\n"; |
| 17 | +$f = 26.7; |
| 18 | +$s = (string) $f; |
| 19 | +$s .= "\xc3\x28"; |
| 20 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 21 | +$f = 2e100; |
| 22 | +$s = (string) $f; |
| 23 | +$s .= "\xc3\x28"; |
| 24 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 25 | + |
| 26 | +echo "Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n"; |
| 27 | +$s1 = "f"; |
| 28 | +$s2 = "o"; |
| 29 | +$s = $s1 . $s2; |
| 30 | +$s = $s . $non_utf8; |
| 31 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 32 | + |
| 33 | +echo "Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n"; |
| 34 | +$s1 = "f"; |
| 35 | +$s2 = "o"; |
| 36 | +$s = $s1 . $s2 . $non_utf8; |
| 37 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 38 | + |
| 39 | +echo "Concatenation known valid UTF-8 with invalid UTF-8 in assignment:\n"; |
| 40 | +$s = "f" . "\xc3\x28"; |
| 41 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 42 | + |
| 43 | +// The "foo" string matches with a "Foo" class which is registered by the zend_test extension. |
| 44 | +// That class name does not have the "valid UTF-8" flag because class names in general |
| 45 | +// don't have to be UTF-8. As the "foo" string here goes through the interning logic, |
| 46 | +// the string gets replaced by the "foo" string from the class, which does |
| 47 | +// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo". |
| 48 | +// The previous "foo" test case works because it is not interned. |
| 49 | +echo "Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:\n"; |
| 50 | +$s = "f" . "o" . "\xc3\x28"; |
| 51 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 52 | + |
| 53 | +echo "Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:\n"; |
| 54 | +$s1 = "f"; |
| 55 | +$s2 = ""; |
| 56 | +$s = $s1 . $s2; |
| 57 | +$s = $s . $non_utf8; |
| 58 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 59 | +$s1 = "f"; |
| 60 | +$s2 = ""; |
| 61 | +$s = $s2 . $s1; |
| 62 | +$s = $s . $non_utf8; |
| 63 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 64 | + |
| 65 | +echo "Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:\n"; |
| 66 | +$s = "f" . ""; |
| 67 | +$s = $s . $non_utf8; |
| 68 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 69 | +$s = "" . "f"; |
| 70 | +$s = $s . $non_utf8; |
| 71 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 72 | + |
| 73 | +echo "Concatenation in loop:\n"; |
| 74 | +const COPY_TIMES = 10_000; |
| 75 | +$string = "a"; |
| 76 | + |
| 77 | +$string_concat = $string; |
| 78 | +for ($i = 1; $i < COPY_TIMES; $i++) { |
| 79 | + $string_concat = $string_concat . $string; |
| 80 | +} |
| 81 | +$string_concat = $string_concat . $non_utf8; |
| 82 | +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); |
| 83 | + |
| 84 | +echo "Concatenation in loop (compound assignment):\n"; |
| 85 | +$string = "a"; |
| 86 | + |
| 87 | +$string_concat = $string; |
| 88 | +for ($i = 1; $i < COPY_TIMES; $i++) { |
| 89 | + $string_concat .= $string; |
| 90 | +} |
| 91 | +$string_concat = $string_concat . $non_utf8; |
| 92 | +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); |
| 93 | + |
| 94 | +echo "Concatenation of objects:\n"; |
| 95 | +class ToString { |
| 96 | + public function __toString() : string{ |
| 97 | + return "z"; |
| 98 | + } |
| 99 | +} |
| 100 | +$o = new ToString(); |
| 101 | +$s = $o . $o; |
| 102 | +$s = $s . $non_utf8; |
| 103 | +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); |
| 104 | + |
| 105 | +?> |
| 106 | +--EXPECT-- |
| 107 | +Integer cast to string concatenated to invalid UTF-8: |
| 108 | +bool(false) |
| 109 | +Float cast to string concatenated to invalid UTF-8: |
| 110 | +bool(false) |
| 111 | +bool(false) |
| 112 | +Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: |
| 113 | +bool(false) |
| 114 | +Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: |
| 115 | +bool(false) |
| 116 | +Concatenation known valid UTF-8 with invalid UTF-8 in assignment: |
| 117 | +bool(false) |
| 118 | +Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment: |
| 119 | +bool(false) |
| 120 | +Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8: |
| 121 | +bool(false) |
| 122 | +bool(false) |
| 123 | +Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8: |
| 124 | +bool(false) |
| 125 | +bool(false) |
| 126 | +Concatenation in loop: |
| 127 | +bool(false) |
| 128 | +Concatenation in loop (compound assignment): |
| 129 | +bool(false) |
| 130 | +Concatenation of objects: |
| 131 | +bool(false) |
0 commit comments