Skip to content

Commit b79a86f

Browse files
committed
Merge branch 'PHP-8.1' into PHP-8.2
* PHP-8.1: Support Microsoft's "Best Fit" mappings for Windows-1252 text encoding
2 parents a76bbd3 + a1a69c3 commit b79a86f

File tree

3 files changed

+11
-16
lines changed

3 files changed

+11
-16
lines changed

ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -485,10 +485,10 @@ DEF_SB_TBL(cp1251, "Windows-1251", "Windows-1251", cp1251_aliases, 0x80, cp1251_
485485

486486
static const char *cp1252_aliases[] = {"cp1252", NULL};
487487
static const unsigned short cp1252_ucs_table[] = {
488-
0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
489-
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000,
490-
0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
491-
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
488+
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
489+
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
490+
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
491+
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
492492
};
493493
DEF_SB(cp1252, "Windows-1252", "Windows-1252", cp1252_aliases);
494494

@@ -504,7 +504,7 @@ static int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
504504
}
505505
}
506506
CK(mbfl_filt_conv_illegal_output(c, filter));
507-
} else if (c <= 0x7F || c >= 0xA0) {
507+
} else if (c <= 0x7F || c >= 0xA0 || c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D) {
508508
CK((*filter->output_function)(c, filter->data));
509509
} else {
510510
CK(mbfl_filt_conv_illegal_output(c, filter));
@@ -562,7 +562,7 @@ static void mb_wchar_to_cp1252(uint32_t *in, size_t len, mb_convert_buf *buf, bo
562562
}
563563
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);
564564
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
565-
} else if (w <= 0x7F || w >= 0xA0) {
565+
} else if (w <= 0x7F || w >= 0xA0 || w == 0x81 || w == 0x8D || w == 0x8F || w == 0x90 || w == 0x9D) {
566566
out = mb_convert_buf_add(out, w);
567567
} else {
568568
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp1252);

ext/mbstring/tests/cp1252_encoding.phpt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,6 @@ if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
1111
include('encoding_tests.inc');
1212
testEncodingFromUTF16ConversionTable(__DIR__ . '/data/CP1252.txt', 'CP1252');
1313

14-
// Test "long" illegal character markers
15-
mb_substitute_character("long");
16-
convertInvalidString("\x81", "%", "CP1252", "UTF-8");
17-
convertInvalidString("\x9D", "%", "CP1252", "UTF-8");
18-
1914
// Test replacement character which cannot be encoded in CP1252
2015
mb_substitute_character(0x1234);
2116
convertInvalidString("\x23\x45", '?', 'UTF-16BE', 'CP1252');

ext/mbstring/tests/data/CP1252.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@
145145
0x7E 0x007E #TILDE
146146
0x7F 0x007F #DELETE
147147
0x80 0x20AC #EURO SIGN
148-
0x81 #UNDEFINED
148+
0x81 0x0081 #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
149149
0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
150150
0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK
151151
0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
@@ -157,10 +157,10 @@
157157
0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
158158
0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
159159
0x8C 0x0152 #LATIN CAPITAL LIGATURE OE
160-
0x8D #UNDEFINED
160+
0x8D 0x008D #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
161161
0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
162-
0x8F #UNDEFINED
163-
0x90 #UNDEFINED
162+
0x8F 0x008F #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
163+
0x90 0x0090 #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
164164
0x91 0x2018 #LEFT SINGLE QUOTATION MARK
165165
0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
166166
0x93 0x201C #LEFT DOUBLE QUOTATION MARK
@@ -173,7 +173,7 @@
173173
0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
174174
0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
175175
0x9C 0x0153 #LATIN SMALL LIGATURE OE
176-
0x9D #UNDEFINED
176+
0x9D 0x009D #*** MODIFIED TO FOLLOW WINDOWS "BEST FIT" MAPPINGS
177177
0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
178178
0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS
179179
0xA0 0x00A0 #NO-BREAK SPACE

0 commit comments

Comments
 (0)