Skip to content

Commit 32df61c

Browse files
committed
Add more tests for UTF-7 text conversion
1 parent ae71bfd commit 32df61c

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

ext/mbstring/tests/utf_encodings.phpt

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,7 @@ for ($i = 0; $i < 256; $i++) {
962962
testValidString('+' . encode("\x12\x34", 'UTF-16BE') . '-', "\x00\x00\x12\x34", 'UTF-7', 'UTF-32BE');
963963
testValidString('+' . encode("\x12\x34\x56\x78", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78", 'UTF-7', 'UTF-32BE');
964964
testValidString('+' . encode("\x12\x34\x56\x78\x00\x40", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78\x00\x00\x00\x40", 'UTF-7', 'UTF-32BE');
965+
testValidString('+' . encode("\xFF\xEE\xEE\xFF", 'UTF-16BE') . '-', "\x00\x00\xFF\xEE\x00\x00\xEE\xFF", 'UTF-7', 'UTF-32BE');
965966

966967
// Surrogate pair
967968
testValidString('+' . encode("\x00\x01\x04\x00", 'UTF-32BE') . '-', "\x00\x01\x04\x00", 'UTF-7', 'UTF-32BE');
@@ -981,7 +982,7 @@ testValidString('+' . encode('AB', 'ASCII') . '-+' . encode('CD', 'ASCII') . '-'
981982
testValidString('+' . encode('AB', 'ASCII') . '-!+' . encode('CD', 'ASCII') . '-', "\x00A\x00B\x00!\x00C\x00D", 'UTF-7', 'UTF-16BE', false);
982983

983984
// + section terminated by a non-Base64 ASCII character which is NOT -
984-
for ($i = 0; $i < 128; $i++) {
985+
for ($i = 0; $i < 128; $i++) {
985986
if ($i >= ord('A') && $i <= ord('Z'))
986987
continue;
987988
if ($i >= ord('a') && $i <= ord('z'))
@@ -994,22 +995,39 @@ for ($i = 0; $i < 128; $i++) {
994995
testValidString('+' . encode("\x12\x34", 'UTF-16BE') . $char, "\x00\x00\x12\x34\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE', false);
995996
}
996997

998+
// Non-direct character followed by direct character
999+
testValidString('%A', '+ACU-A', 'ASCII', 'UTF-7');
1000+
testValidString('%%A', '+ACUAJQ-A', 'ASCII', 'UTF-7');
1001+
testValidString('%%%A', '+ACUAJQAl-A', 'ASCII', 'UTF-7');
1002+
9971003
// Now let's see how UTF-7 can go BAD...
9981004

9991005
function rawEncode($str) {
10001006
return str_replace('=', '', base64_encode($str));
10011007
}
10021008

1009+
// Totally bogus byte
1010+
testInvalidString("\xFF", "%", 'UTF-7', 'UTF-8');
1011+
// Totally bogus codepoint... '+ACU-' is '%' in UTF-7'
1012+
testInvalidString("\x12\x34\x56\x78", "+ACU-", 'UTF-32BE', 'UTF-7');
1013+
10031014
// First, messed up UTF16 in + section
10041015
// Second half of surrogate pair coming first
10051016
testInvalidString('+' . rawEncode("\xDC\x01\xD8\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1017+
testInvalidString('+' . rawEncode("\x00.\xDC\x01\xD8\x02") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1018+
testInvalidString('+' . rawEncode("\x00.\x00.\xDC\x01\xD8\x02") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
10061019

10071020
// First half of surrogate pair not followed by second half
10081021
testInvalidString('+' . rawEncode("\xD8\x01\x00A") . '-', "\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE');
10091022
testInvalidString('+' . rawEncode("\xD8\x01\xD9\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1023+
testInvalidString('+' . rawEncode("\x00.\xD8\x01\x00A") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE');
1024+
testInvalidString('+' . rawEncode("\x00.\xD8\x01\xD9\x02") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1025+
testInvalidString('+' . rawEncode("\x00.\x00.\xD8\x01\x00A") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE');
1026+
testInvalidString('+' . rawEncode("\x00.\x00.\xD8\x01\xD9\x02") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
10101027

10111028
// First half of surrogate pair appearing at end of string
10121029
testInvalidString('+' . rawEncode("\xD8\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1030+
testInvalidString('+' . rawEncode("\xD8\x01"), "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
10131031

10141032
// Truncated string
10151033
testInvalidString('+' . rawEncode("\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');

0 commit comments

Comments
 (0)