Skip to content

Commit d1d50c2

Browse files
committed
Test EUC-JP and Shift-JIS more thoroughly
Previously, the unit tests for these text encodings covered all mappings from legacy -> Unicode, and all _reversible_ mappings from Unicode -> legacy. However, we should also test the few Unicode -> legacy mappings which are not reversible.
1 parent 1562d76 commit d1d50c2

File tree

4 files changed

+13
-12
lines changed

4 files changed

+13
-12
lines changed

ext/mbstring/tests/armscii8_encoding.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ testAllInvalidChars($invalid, $toUnicode, 'ARMSCII-8', 'UTF-16BE', "\x00%");
2323
testTruncatedChars($truncated, 'ARMSCII-8', 'UTF-16BE', "\x00%");
2424
echo "Tested ARMSCII-8 -> UTF-16BE\n";
2525

26-
findInvalidChars($fromUnicode, $invalid, $unused, map(range(0,0xFF), 2));
26+
findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2));
2727
convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', 'ARMSCII-8', '%');
2828
echo "Tested UTF-16BE -> ARMSCII-8\n";
2929
?>

ext/mbstring/tests/encoding_tests.inc

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,6 @@ function findInvalidChars($valid, &$invalid, &$truncated, $startBytes = array())
207207
}
208208
}
209209

210-
// Helper for building `$startBytes` map for above function
211-
function map($keys, $value, $array = array()) {
212-
foreach ($keys as $key)
213-
$array[$key] = $value;
214-
return $array;
215-
}
216-
217210
function testEncodingFromUTF16ConversionTable($path, $encoding, $replacement = '%', $startBytes = array()) {
218211
srand(1000); // Make results consistent
219212
mb_substitute_character(0x25); // '%'
@@ -225,7 +218,7 @@ function testEncodingFromUTF16ConversionTable($path, $encoding, $replacement = '
225218
testTruncatedChars($truncated, $encoding, 'UTF-16BE', "\x00%");
226219
echo "Tested $encoding -> UTF-16BE\n";
227220

228-
findInvalidChars($fromUnicode, $invalid, $unused, map(range(0,0xFF), 2));
221+
findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2));
229222
convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', $encoding, $replacement);
230223
echo "Tested UTF-16BE -> $encoding\n";
231224
}

ext/mbstring/tests/eucjp_encoding.phpt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ $fromUnicode["\x00\x00\x00\x7E"] = "\x7E";
4343
/* Likewise with 0x005C */
4444
$fromUnicode["\x00\x00\x00\x5C"] = "\x5C";
4545

46-
findInvalidChars($validChars, $invalidChars, $truncated, map(range(0xA1, 0xFE), 2, array(0x8E => 2, 0x8F => 3)));
46+
findInvalidChars($validChars, $invalidChars, $truncated, array_fill_keys(range(0xA1, 0xFE), 2) + array(0x8E => 2, 0x8F => 3));
4747

4848
/* In the JIS X 0212 character set, kuten code 0x2237 (EUC-JP 0x8FA2B7)
4949
* is an ordinary tilde character
@@ -61,6 +61,9 @@ echo "Encoding verification and conversion work for all invalid characters\n";
6161
testValidString("\x8F\xA2\xB7", "\x00\x00\x00~", 'EUC-JP', 'UTF-32BE', false);
6262
echo "Irreversible mapping of 0x8FA2B7 follows JIS X 0212 correctly\n";
6363

64+
testAllValidChars($fromUnicode, 'UTF-32BE', 'EUC-JP', false);
65+
echo "Unicode -> EUC-JP conversion works on all valid characters\n";
66+
6467
$invalidChars = array();
6568
for ($cp = 0; $cp <= 0xFFFF; $cp++) {
6669
$char = pack('N', $cp);
@@ -74,4 +77,5 @@ echo "Unicode -> EUC-JP conversion works on all invalid characters\n";
7477
Encoding verification and conversion work for all valid characters
7578
Encoding verification and conversion work for all invalid characters
7679
Irreversible mapping of 0x8FA2B7 follows JIS X 0212 correctly
80+
Unicode -> EUC-JP conversion works on all valid characters
7781
Unicode -> EUC-JP conversion works on all invalid characters

ext/mbstring/tests/sjis_encoding.phpt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,20 @@ testAllValidChars($validChars, 'Shift-JIS', 'UTF-16BE');
4646
echo "SJIS verification and conversion works on all valid characters\n";
4747

4848
findInvalidChars($validChars, $invalidChars, $truncated,
49-
map(range(0x81, 0x9F), 2, map(range(0xE0, 0xEF), 2)));
49+
array_fill_keys(range(0x81, 0x9F), 2) + array_fill_keys(range(0xE0, 0xEF), 2));
5050
testAllInvalidChars($invalidChars, $validChars, 'Shift-JIS', 'UTF-16BE', "\x00%");
5151
testTruncatedChars($truncated, 'Shift-JIS', 'UTF-16BE', "\x00%");
5252
echo "SJIS verification and conversion works on all invalid characters\n";
5353

54-
findInvalidChars($fromUnicode, $invalidChars, $unused, map(range(0, 0xFF), 2));
54+
testAllValidChars($fromUnicode, 'UTF-16BE', 'Shift-JIS', false);
55+
echo "Unicode -> SJIS conversion works on all valid characters\n";
56+
57+
findInvalidChars($fromUnicode, $invalidChars, $unused, array_fill_keys(range(0, 0xFF), 2));
5558
convertAllInvalidChars($invalidChars, $fromUnicode, 'UTF-16BE', 'Shift-JIS', '%');
5659
echo "Unicode -> SJIS conversion works on all invalid characters\n";
5760
?>
5861
--EXPECT--
5962
SJIS verification and conversion works on all valid characters
6063
SJIS verification and conversion works on all invalid characters
64+
Unicode -> SJIS conversion works on all valid characters
6165
Unicode -> SJIS conversion works on all invalid characters

0 commit comments

Comments
 (0)