Skip to content

Commit a5827c2

Browse files
committed
Fix broken binary search function in mbstring
This faulty binary search would never reject values at the very high end of the range being searched, even if they were not actually in the table. Among other things, this meant that some Unicode codepoints which do not correspond to any character in JIS X 0213 would be converted to bogus Shift-JIS-2004 values rather than being rejected.
1 parent b05ad51 commit a5827c2

File tree

1 file changed

+23
-28
lines changed

1 file changed

+23
-28
lines changed

ext/mbstring/libmbfl/filters/mbfilter_gb18030.c

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,37 @@ const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = {
6868

6969
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
7070

71-
72-
int
73-
mbfl_bisec_srch(int w, const unsigned short *tbl, int n)
71+
/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */
72+
int mbfl_bisec_srch(int w, const unsigned short *tbl, int n)
7473
{
75-
int k, k1 = 0, k2 = n-1;
76-
77-
while (k1 < k2) {
78-
k = (k1+k2) >> 1;
79-
if (w <= tbl[2*k+1]) {
80-
k2 = k;
81-
} else if (w >= tbl[2*k+2]) {
82-
k1 = k + 1;
74+
int l = 0, r = n-1;
75+
while (l <= r) {
76+
int probe = (l + r) >> 1;
77+
unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1];
78+
if (w < lo) {
79+
r = probe - 1;
80+
} else if (w > hi) {
81+
l = probe + 1;
8382
} else {
84-
return -1;
83+
return probe;
8584
}
8685
}
87-
return k1;
86+
return -1;
8887
}
8988

90-
int
91-
mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
89+
/* `tbl` contains single values, not ranges */
90+
int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
9291
{
93-
int k, k1 = 0, k2 = n;
94-
95-
if (w == tbl[0]) {
96-
return 0;
97-
}
98-
99-
while (k2 - k1 > 1) {
100-
k = (k1 + k2) >> 1;
101-
if (w < tbl[k]) {
102-
k2 = k;
103-
} else if (w > tbl[k]) {
104-
k1 = k;
92+
int l = 0, r = n-1;
93+
while (l <= r) {
94+
int probe = (l + r) >> 1;
95+
unsigned short val = tbl[probe];
96+
if (w < val) {
97+
r = probe - 1;
98+
} else if (w > val) {
99+
l = probe + 1;
105100
} else {
106-
return k;
101+
return probe;
107102
}
108103
}
109104
return -1;

0 commit comments

Comments
 (0)