Skip to content

Commit d504ad5

Browse files
committed
Base mbfl_strpos on zend_memnstr
The same algorithm is also used by zend_memnstr, but it also has a fast-path for short strings / needles, where a more naive search performs better.
1 parent 73b3130 commit d504ad5

File tree

1 file changed

+17
-44
lines changed

1 file changed

+17
-44
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 17 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
#ifdef HAVE_CONFIG_H
8484
#include "config.h"
8585
#endif
86+
#include "zend_operators.h"
8687

8788
#include <stddef.h>
8889
#include <string.h>
@@ -898,54 +899,26 @@ mbfl_strpos(
898899
goto out;
899900
}
900901

901-
if (needle_u8->len == 0) {
902-
size_t haystack_length = mbfl_strlen(haystack_u8);
903-
if (offset < 0) {
904-
result = haystack_length + offset;
905-
} else if (reverse) {
906-
result = haystack_length;
907-
} else {
908-
result = (size_t) offset;
909-
}
910-
goto out;
911-
}
912-
913902
if (!reverse) {
914-
size_t jtbl[1 << (sizeof(unsigned char) * 8)];
915-
size_t needle_u8_len = needle_u8->len;
916-
size_t i;
917-
const unsigned char *p, *q, *e;
918-
const unsigned char *haystack_u8_val = haystack_u8->val,
919-
*needle_u8_val = needle_u8->val;
920-
for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
921-
jtbl[i] = needle_u8_len + 1;
922-
}
923-
for (i = 0; i < needle_u8_len - 1; ++i) {
924-
jtbl[needle_u8_val[i]] = needle_u8_len - i;
925-
}
926-
e = haystack_u8_val + haystack_u8->len;
927-
p = offset_pointer + needle_u8_len;
928-
if (p > e) {
929-
goto out;
903+
const char *found_pos = zend_memnstr(
904+
(const char *) offset_pointer,
905+
(const char *) needle_u8->val, needle_u8->len,
906+
(const char *) haystack_u8->val + haystack_u8->len);
907+
if (found_pos) {
908+
result = mbfl_pointer_to_offset_utf8(
909+
haystack_u8->val, (const unsigned char *) found_pos);
930910
}
931-
while (p <= e) {
932-
const unsigned char *pv = p;
933-
q = needle_u8_val + needle_u8_len;
934-
for (;;) {
935-
if (q == needle_u8_val) {
936-
result = mbfl_pointer_to_offset_utf8(haystack_u8_val, p);
937-
goto out;
938-
}
939-
if (*--q != *--p) {
940-
break;
941-
}
942-
}
943-
p += jtbl[*p];
944-
if (p <= pv) {
945-
p = pv + 1;
911+
} else {
912+
if (needle_u8->len == 0) {
913+
size_t haystack_length = mbfl_strlen(haystack_u8);
914+
if (offset < 0) {
915+
result = haystack_length + offset;
916+
} else {
917+
result = haystack_length;
946918
}
919+
goto out;
947920
}
948-
} else {
921+
949922
size_t jtbl[1 << (sizeof(unsigned char) * 8)];
950923
size_t needle_u8_len = needle_u8->len, needle_len = 0;
951924
size_t i;

0 commit comments

Comments
 (0)