Skip to content

Commit 5a87099

Browse files
author
Moriyoshi Koizumi
committed
- Refix bug #43840.
- Fix bug #43841. - Remove redundant trailing dots from the error messages. - Fix tests.
1 parent e005c5a commit 5a87099

17 files changed

+353
-150
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 183 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,7 @@ collector_strpos(int c, void* data)
778778
for (;;) {
779779
pc->found_pos++;
780780
p = h;
781-
m = pc->needle.buffer;
781+
m = (int *)pc->needle.buffer;
782782
n = pc->needle_pos - 1;
783783
while (n > 0 && *p == *m) {
784784
n--;
@@ -857,87 +857,203 @@ mbfl_strpos(
857857
int offset,
858858
int reverse)
859859
{
860-
int n, result, negative_offset = 0;
861-
unsigned char *p;
862-
mbfl_convert_filter *filter;
863-
struct collector_strpos_data pc;
860+
int result;
861+
mbfl_string _haystack_u8, _needle_u8;
862+
const mbfl_string *haystack_u8, *needle_u8;
863+
const unsigned char *u8_tbl;
864864

865-
if (haystack == NULL || needle == NULL) {
865+
if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
866866
return -8;
867867
}
868-
/* needle is converted into wchar */
869-
mbfl_wchar_device_init(&pc.needle);
870-
filter = mbfl_convert_filter_new(
871-
needle->no_encoding,
872-
mbfl_no_encoding_wchar,
873-
mbfl_wchar_device_output, 0, &pc.needle);
874-
if (filter == NULL) {
875-
return -4;
876-
}
877-
p = needle->val;
878-
n = needle->len;
879-
if (p != NULL) {
880-
while (n > 0) {
881-
if ((*filter->filter_function)(*p++, filter) < 0) {
882-
break;
883-
}
884-
n--;
868+
869+
{
870+
const mbfl_encoding *u8_enc;
871+
u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
872+
if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
873+
return -8;
885874
}
875+
u8_tbl = u8_enc->mblen_table;
886876
}
887-
mbfl_convert_filter_flush(filter);
888-
mbfl_convert_filter_delete(filter);
889-
pc.needle_len = pc.needle.pos;
890-
if (pc.needle.buffer == NULL) {
891-
return -4;
892-
}
893-
if (pc.needle_len <= 0) {
894-
mbfl_wchar_device_clear(&pc.needle);
895-
return -2;
896-
}
897-
/* initialize filter and collector data */
898-
filter = mbfl_convert_filter_new(
899-
haystack->no_encoding,
900-
mbfl_no_encoding_wchar,
901-
collector_strpos, 0, &pc);
902-
if (filter == NULL) {
903-
mbfl_wchar_device_clear(&pc.needle);
904-
return -4;
877+
878+
if (haystack->no_encoding != mbfl_no_encoding_utf8) {
879+
mbfl_string_init(&_haystack_u8);
880+
haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
881+
if (haystack_u8 == NULL) {
882+
result = -4;
883+
goto out;
884+
}
885+
} else {
886+
haystack_u8 = haystack;
905887
}
906888

907-
if (offset < 0) {
908-
negative_offset = -offset - pc.needle_len;
909-
if (negative_offset < 0) {
910-
negative_offset = 0;
889+
if (needle->no_encoding != mbfl_no_encoding_utf8) {
890+
mbfl_string_init(&_needle_u8);
891+
needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
892+
if (needle_u8 == NULL) {
893+
result = -4;
894+
goto out;
911895
}
912-
offset = 0;
896+
} else {
897+
needle_u8 = needle;
913898
}
914899

915-
pc.start = offset;
916-
pc.output = 0;
917-
pc.needle_pos = 0;
918-
pc.found_pos = 0;
919-
pc.matched_pos = -1;
900+
if (needle_u8->len < 1) {
901+
result = -8;
902+
goto out;
903+
}
920904

921-
/* feed data */
922-
p = haystack->val;
923-
n = haystack->len - negative_offset;
924-
if (p != NULL) {
925-
while (n > 0) {
926-
if ((*filter->filter_function)(*p++, filter) < 0) {
927-
pc.matched_pos = -4;
928-
break;
905+
result = -1;
906+
if (haystack_u8->len < needle_u8->len) {
907+
goto out;
908+
}
909+
910+
if (!reverse) {
911+
unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
912+
unsigned int needle_u8_len = needle_u8->len;
913+
unsigned int i;
914+
const unsigned char *p, *q, *e;
915+
const unsigned char *haystack_u8_val = haystack_u8->val,
916+
*needle_u8_val = needle_u8->val;
917+
for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
918+
jtbl[i] = needle_u8_len + 1;
919+
}
920+
for (i = 0; i < needle_u8_len - 1; ++i) {
921+
jtbl[needle_u8_val[i]] = needle_u8_len - i;
922+
}
923+
e = haystack_u8_val + haystack_u8->len;
924+
p = haystack_u8_val;
925+
while (--offset >= 0) {
926+
if (p >= e) {
927+
result = -16;
928+
goto out;
929929
}
930-
if (pc.matched_pos >= 0 && !reverse) {
931-
break;
930+
p += u8_tbl[*p];
931+
}
932+
p += needle_u8_len;
933+
if (p > e) {
934+
goto out;
935+
}
936+
while (p <= e) {
937+
const unsigned char *pv = p;
938+
q = needle_u8_val + needle_u8_len;
939+
for (;;) {
940+
if (q == needle_u8_val) {
941+
result = 0;
942+
while (p > haystack_u8_val) {
943+
unsigned char c = *--p;
944+
if (c < 0x80) {
945+
++result;
946+
} else if ((c & 0xc0) != 0x80) {
947+
++result;
948+
}
949+
}
950+
goto out;
951+
}
952+
if (*--q != *--p) {
953+
break;
954+
}
955+
}
956+
p += jtbl[*p];
957+
if (p <= pv) {
958+
p = pv + 1;
959+
}
960+
}
961+
} else {
962+
unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
963+
unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
964+
unsigned int i;
965+
const unsigned char *p, *e, *q, *qe;
966+
const unsigned char *haystack_u8_val = haystack_u8->val,
967+
*needle_u8_val = needle_u8->val;
968+
for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
969+
jtbl[i] = needle_u8_len;
970+
}
971+
for (i = needle_u8_len - 1; i > 0; --i) {
972+
unsigned char c = needle_u8_val[i];
973+
jtbl[c] = i;
974+
if (c < 0x80) {
975+
++needle_len;
976+
} else if ((c & 0xc0) != 0x80) {
977+
++needle_len;
978+
}
979+
}
980+
{
981+
unsigned char c = needle_u8_val[0];
982+
if (c < 0x80) {
983+
++needle_len;
984+
} else if ((c & 0xc0) != 0x80) {
985+
++needle_len;
986+
}
987+
}
988+
e = haystack_u8_val;
989+
p = e + haystack_u8->len;
990+
qe = needle_u8_val + needle_u8_len;
991+
if (offset < 0) {
992+
if (-offset > needle_len) {
993+
offset += needle_len;
994+
while (offset < 0) {
995+
unsigned char c;
996+
if (p <= e) {
997+
result = -16;
998+
goto out;
999+
}
1000+
c = *(--p);
1001+
if (c < 0x80) {
1002+
++offset;
1003+
} else if ((c & 0xc0) != 0x80) {
1004+
++offset;
1005+
}
1006+
}
1007+
}
1008+
} else {
1009+
const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1010+
while (--offset >= 0) {
1011+
if (e >= ee) {
1012+
result = -16;
1013+
goto out;
1014+
}
1015+
e += u8_tbl[*e];
1016+
}
1017+
}
1018+
if (p < e + needle_u8_len) {
1019+
goto out;
1020+
}
1021+
p -= needle_u8_len;
1022+
while (p >= e) {
1023+
const unsigned char *pv = p;
1024+
q = needle_u8_val;
1025+
for (;;) {
1026+
if (q == qe) {
1027+
result = 0;
1028+
p -= needle_u8_len;
1029+
while (p > haystack_u8_val) {
1030+
unsigned char c = *--p;
1031+
if (c < 0x80) {
1032+
++result;
1033+
} else if ((c & 0xc0) != 0x80) {
1034+
++result;
1035+
}
1036+
}
1037+
goto out;
1038+
}
1039+
if (*q != *p) {
1040+
break;
1041+
}
1042+
++p, ++q;
1043+
}
1044+
p -= jtbl[*p];
1045+
if (p >= pv) {
1046+
p = pv - 1;
9321047
}
933-
n--;
9341048
}
9351049
}
936-
mbfl_convert_filter_flush(filter);
937-
result = pc.matched_pos;
938-
mbfl_convert_filter_delete(filter);
939-
mbfl_wchar_device_clear(&pc.needle);
940-
1050+
out:
1051+
if (haystack_u8 == &_haystack_u8) {
1052+
mbfl_string_clear(&_haystack_u8);
1053+
}
1054+
if (needle_u8 == &_needle_u8) {
1055+
mbfl_string_clear(&_needle_u8);
1056+
}
9411057
return result;
9421058
}
9431059

ext/mbstring/mbstring.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,7 +2080,7 @@ PHP_FUNCTION(mb_strpos)
20802080
RETURN_FALSE;
20812081
}
20822082
if (needle.len == 0) {
2083-
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
2083+
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
20842084
RETURN_FALSE;
20852085
}
20862086

@@ -2223,7 +2223,7 @@ PHP_FUNCTION(mb_stripos)
22232223
RETURN_FALSE;
22242224
}
22252225
if (needle.len == 0) {
2226-
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
2226+
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
22272227
RETURN_FALSE;
22282228
}
22292229
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
@@ -2292,7 +2292,7 @@ PHP_FUNCTION(mb_strstr)
22922292
}
22932293

22942294
if (needle.len <= 0) {
2295-
php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter.");
2295+
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
22962296
RETURN_FALSE;
22972297
}
22982298
n = mbfl_strpos(&haystack, &needle, 0, 0);
@@ -2402,7 +2402,7 @@ PHP_FUNCTION(mb_stristr)
24022402
}
24032403

24042404
if (!needle.len) {
2405-
php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter.");
2405+
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
24062406
RETURN_FALSE;
24072407
}
24082408

@@ -2521,7 +2521,7 @@ PHP_FUNCTION(mb_substr_count)
25212521
}
25222522

25232523
if (needle.len <= 0) {
2524-
php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty substring.");
2524+
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
25252525
RETURN_FALSE;
25262526
}
25272527

ext/mbstring/tests/bug43840.phpt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,31 @@ bool(false)
4646

4747
-- Offset is 22 --
4848
--Multibyte String:--
49+
50+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
4951
bool(false)
5052
--ASCII String:--
5153

52-
Warning: mb_strpos(): Offset not contained in string. in %s on line %d
54+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
5355
bool(false)
5456

5557
-- Offset is 53 --
5658
--Multibyte String:--
59+
60+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
5761
bool(false)
5862
--ASCII String:--
5963

60-
Warning: mb_strpos(): Offset not contained in string. in %s on line %d
64+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
6165
bool(false)
6266

6367
-- Offset is 54 --
6468
--Multibyte String:--
6569

66-
Warning: mb_strpos(): Offset not contained in string. in %s on line %d
70+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
6771
bool(false)
6872
--ASCII String:--
6973

70-
Warning: mb_strpos(): Offset not contained in string. in %s on line %d
74+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
7175
bool(false)
7276

0 commit comments

Comments
 (0)