Skip to content

Commit 7684a3d

Browse files
authored
ext/mbstring: move unsigned 32 bit integer tests to a new test (#12891)
And only run it on 64 bit architectures as those are floats on 32 bit.
1 parent b0f7df1 commit 7684a3d

File tree

4 files changed

+74
-27
lines changed

4 files changed

+74
-27
lines changed

ext/mbstring/tests/mb_decode_numericentity.phpt

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -103,26 +103,9 @@ test("Successive hex entities", "&#x322", "22", [0, 0xFFFF, 0, 0xFFFF], 'AS
103103
test("Starting entity immediately after decimal entity which is too long", "&#10000000000A", "&#10000000000A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII');
104104
test("Starting entity immediately after hex entity which is too long", "&#x111111111A", "&#x111111111A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII');
105105

106-
$ucs4_test1 = mb_convert_encoding("&#1000000000A", 'UCS-4BE', 'ASCII');
107-
testNonAscii("Starting entity immediately after valid decimal entity which is just within maximum length", $ucs4_test1, "\x3B\x9A\xCA\x00\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE');
108-
$ucs4_test2 = mb_convert_encoding("&#x11111111A", 'UCS-4BE', 'ASCII');
109-
testNonAscii("Starting entity immediately after valid hex entity which is just within maximum length", $ucs4_test2, "\x11\x11\x11\x11\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE');
110-
111106
test("Starting entity immediately after invalid decimal entity", "&#0A", "&#0A", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII');
112107
test("Starting entity immediately after invalid hex entity", "&#x0A", "&#x0A", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII');
113108

114-
test("Starting entity immediately after too-big decimal entity", "&#7001492542A", "&#7001492542A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'ASCII');
115-
116-
// If the numeric entity decodes to 0xFFFFFFFF, that should be passed through
117-
// Originally, the new implementation of mb_decode_numericentity used -1 as a marker indicating
118-
// that the entity could not be successfully decoded, so if the entity decoded successfully to
119-
// 0xFFFFFFFF (-1), it would be treated as an invalid entity
120-
test("Regression test (entity which decodes to 0xFFFFFFFF)", "", "?", [0xFFFFFF86, 0xFFFFFFFF, 0xF, 0xFC015448], 'HZ');
121-
122-
// With the legacy conversion filters, a trailing & could be truncated by mb_decode_numericentity,
123-
// because some text encodings did not properly invoke the next flush function in the chain
124-
test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&&&", [0x20FF37FF, 0x7202F569, 0xC4090023, 0xF160], "JIS");
125-
126109
// Previously, signed arithmetic was used on convmap entries
127110
test("Regression test (convmap entries are now treated as unsigned)", "&#7,", "?,", [0x22FFFF11, 0xBF111189, 0x67726511, 0x1161E719], "ASCII");
128111

@@ -194,11 +177,6 @@ Successive &#65: string(9) "&#65A" => string(2) "AA" (Good)
194177
Successive hex entities: string(11) "&#x322" => string(2) "22" (Good)
195178
Starting entity immediately after decimal entity which is too long: string(18) "&#10000000000A" => string(14) "&#10000000000A" (Good)
196179
Starting entity immediately after hex entity which is too long: string(17) "&#x111111111A" => string(13) "&#x111111111A" (Good)
197-
Starting entity immediately after valid decimal entity which is just within maximum length: 000000260000002300000031000000300000003000000030000000300000003000000030000000300000003000000030000000260000002300000036000000350000003b => 3b9aca0000000041 (Good)
198-
Starting entity immediately after valid hex entity which is just within maximum length: 0000002600000023000000780000003100000031000000310000003100000031000000310000003100000031000000260000002300000036000000350000003b => 1111111100000041 (Good)
199180
Starting entity immediately after invalid decimal entity: string(8) "&#0A" => string(4) "&#0A" (Good)
200181
Starting entity immediately after invalid hex entity: string(9) "&#x0A" => string(5) "&#x0A" (Good)
201-
Starting entity immediately after too-big decimal entity: string(17) "&#7001492542A" => string(13) "&#7001492542A" (Good)
202-
Regression test (entity which decodes to 0xFFFFFFFF): string(5) "" => string(1) "?" (Good)
203-
Regression test (truncation of successive & with JIS encoding): string(3) "&&&" => string(3) "&&&" (Good)
204182
Regression test (convmap entries are now treated as unsigned): string(4) "&#7," => string(2) "?," (Good)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
--TEST--
2+
mb_decode_numericentity() with 0xFFFFFFFF in conversion map
3+
--EXTENSIONS--
4+
mbstring
5+
--SKIPIF--
6+
<?php if (PHP_INT_SIZE != 8) die("skip this test is for 64bit platform only"); ?>
7+
--FILE--
8+
<?php
9+
10+
function varDumpToString($var)
11+
{
12+
ob_start();
13+
var_dump($var);
14+
return trim(ob_get_clean());
15+
}
16+
17+
function test($desc, $str, $expected, $convmap, $encoding) {
18+
$result = mb_decode_numericentity($str, $convmap, $encoding);
19+
echo $desc, ": ", varDumpToString($str), " => ", varDumpToString($result);
20+
if ($result === $expected)
21+
echo " (Good)\n";
22+
else
23+
echo " (BAD; expected ", varDumpToString($expected), ")\n";
24+
}
25+
26+
function testNonAscii($desc, $str, $expected, $convmap, $encoding) {
27+
$result = mb_decode_numericentity($str, $convmap, $encoding);
28+
echo $desc, ": ", bin2hex($str), " => ", bin2hex($result);
29+
if ($result === $expected)
30+
echo " (Good)\n";
31+
else
32+
echo " (BAD; expected ", bin2hex($expected), ")\n";
33+
}
34+
35+
$ucs4_test1 = mb_convert_encoding("&#1000000000&#65;", 'UCS-4BE', 'ASCII');
36+
testNonAscii("Starting entity immediately after valid decimal entity which is just within maximum length", $ucs4_test1, "\x3B\x9A\xCA\x00\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE');
37+
$ucs4_test2 = mb_convert_encoding("&#x11111111&#65;", 'UCS-4BE', 'ASCII');
38+
testNonAscii("Starting entity immediately after valid hex entity which is just within maximum length", $ucs4_test2, "\x11\x11\x11\x11\x00\x00\x00A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'UCS-4BE');
39+
40+
test("Starting entity immediately after too-big decimal entity", "&#7001492542&#65;", "&#7001492542A", [0, 0xFFFFFFFF, 0, 0xFFFFFFFF], 'ASCII');
41+
42+
// If the numeric entity decodes to 0xFFFFFFFF, that should be passed through
43+
// Originally, the new implementation of mb_decode_numericentity used -1 as a marker indicating
44+
// that the entity could not be successfully decoded, so if the entity decoded successfully to
45+
// 0xFFFFFFFF (-1), it would be treated as an invalid entity
46+
test("Regression test (entity which decodes to 0xFFFFFFFF)", "&#xe;", "?", [0xFFFFFF86, 0xFFFFFFFF, 0xF, 0xFC015448], 'HZ');
47+
48+
// With the legacy conversion filters, a trailing & could be truncated by mb_decode_numericentity,
49+
// because some text encodings did not properly invoke the next flush function in the chain
50+
test("Regression test (truncation of successive & with JIS encoding)", "&&&", "&&&", [0x20FF37FF, 0x7202F569, 0xC4090023, 0xF160], "JIS");
51+
52+
?>
53+
--EXPECT--
54+
Starting entity immediately after valid decimal entity which is just within maximum length: 000000260000002300000031000000300000003000000030000000300000003000000030000000300000003000000030000000260000002300000036000000350000003b => 3b9aca0000000041 (Good)
55+
Starting entity immediately after valid hex entity which is just within maximum length: 0000002600000023000000780000003100000031000000310000003100000031000000310000003100000031000000260000002300000036000000350000003b => 1111111100000041 (Good)
56+
Starting entity immediately after too-big decimal entity: string(17) "&#7001492542&#65;" => string(13) "&#7001492542A" (Good)
57+
Regression test (entity which decodes to 0xFFFFFFFF): string(5) "&#xe;" => string(1) "?" (Good)
58+
Regression test (truncation of successive & with JIS encoding): string(3) "&&&" => string(3) "&&&" (Good)

ext/mbstring/tests/mb_encode_numericentity.phpt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,6 @@ echo "11 (hex): " . mb_encode_numericentity($iso2022jp, $convmap, "ISO-2022-JP",
5454
$convmap = [0x2b, 0x2d4, 0x75656500, 0x656d2c53];
5555
echo "12: " . mb_encode_numericentity("m", $convmap, "ASCII") . "\n";
5656

57-
// Regression test; the old implementation could only emit hexadecimal entities with about 5 digits
58-
$convmap = [0xffffffff, 0xffffffff, 0x540a0af7, 0x5a485054];
59-
echo "13: " . mb_encode_numericentity("\xFF", $convmap, "ASCII", true) . "\n";
60-
6157
?>
6258
--EXPECT--
6359
1: &#161;&#162;&#163;&#164;&#165;&#166;&#167;&#168;&#169;&#170;&#171;&#172;&#173;&#174;&#175;&#176;&#177;&#178;&#179;&#180;&#181;&#182;&#183;&#184;&#185;&#186;&#187;&#188;&#189;&#190;&#191;&#192;&#193;&#194;&#195;&#196;&#197;&#198;&#199;&#200;&#201;&#202;&#203;&#204;&#205;&#206;&#207;&#208;&#209;&#210;&#211;&#212;&#213;&#214;&#215;&#216;&#217;&#218;&#219;&#220;&#221;&#222;&#223;&#224;&#225;&#226;&#227;&#228;&#229;&#230;&#231;&#232;&#233;&#234;&#235;&#236;&#237;&#238;&#239;&#240;&#241;&#242;&#243;&#244;&#245;&#246;&#247;&#248;&#249;&#250;&#251;&#252;&#253;&#254;&#255;
@@ -78,4 +74,3 @@ echo "13: " . mb_encode_numericentity("\xFF", $convmap, "ASCII", true) . "\n";
7874
11: &#12288;&#163;&#65;&#66;&#67;
7975
11 (hex): &#x3000;&#xA3;&#x41;&#x42;&#x43;
8076
12: &#1701127233;
81-
13: &#x50080054;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
--TEST--
2+
mb_encode_numericentity() with 0xFFFFFFFF in conversion map
3+
--EXTENSIONS--
4+
mbstring
5+
--SKIPIF--
6+
<?php if (PHP_INT_SIZE != 8) die("skip this test is for 64bit platform only"); ?>
7+
--FILE--
8+
<?php
9+
10+
// Regression test; the old implementation could only emit hexadecimal entities with about 5 digits
11+
$convmap = [0xffffffff, 0xffffffff, 0x540a0af7, 0x5a485054];
12+
echo "13: " . mb_encode_numericentity("\xFF", $convmap, "ASCII", true) . "\n";
13+
14+
?>
15+
--EXPECT--
16+
13: &#x50080054;

0 commit comments

Comments
 (0)