Skip to content

Commit a6603b6

Browse files
committed
Add identify filter for ISO-8859-6 (Latin/Arabic)
Note that some text encoding conversion libraries, such as Solaris iconv and FreeBSD iconv, map 0x30-0x39 to the Arabic script numerals rather than the 'regular' Roman numerals. (That is, to Unicode codepoints 0x660-0x669.) Further, Windows CP28596 adds more mappings to use the unused bytes in ISO-8859-6.
1 parent 23270d7 commit a6603b6

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "mbfilter_iso8859_6.h"
3232
#include "unicode_table_iso8859_6.h"
3333

34+
static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter);
35+
3436
static const char *mbfl_encoding_8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
3537

3638
const mbfl_encoding mbfl_encoding_8859_6 = {
@@ -47,7 +49,7 @@ const mbfl_encoding mbfl_encoding_8859_6 = {
4749
const struct mbfl_identify_vtbl vtbl_identify_8859_6 = {
4850
mbfl_no_encoding_8859_6,
4951
mbfl_filt_ident_common_ctor,
50-
mbfl_filt_ident_true
52+
mbfl_filt_ident_iso8859_6
5153
};
5254

5355
const struct mbfl_convert_vtbl vtbl_8859_6_wchar = {
@@ -132,3 +134,11 @@ int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter)
132134

133135
return c;
134136
}
137+
138+
static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter)
139+
{
140+
if (c >= 0xA0 && !iso8859_6_ucs_table[c - 0xA0]) {
141+
filter->status = 1;
142+
}
143+
return c;
144+
}

0 commit comments

Comments
 (0)