Skip to content

Commit ce61698

Browse files
committed
Move offset error checking into mbfl_strpos
This avoids calculating the full length only in order to validate the offset, as mbfl_strpos needs to find the offset internally anyway.
1 parent 0f6d223 commit ce61698

File tree

6 files changed

+107
-70
lines changed

6 files changed

+107
-70
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,37 @@ mbfl_oddlen(mbfl_string *string)
809809
/* NOT REACHED */
810810
}
811811

812+
static const unsigned char *mbfl_find_offset_utf8(const mbfl_string *str, ssize_t offset) {
813+
if (offset < 0) {
814+
const unsigned char *pos = str->val + str->len;
815+
const unsigned char *begin = str->val;
816+
while (offset < 0) {
817+
if (pos <= begin) {
818+
return NULL;
819+
}
820+
821+
unsigned char c = *(--pos);
822+
if (c < 0x80) {
823+
++offset;
824+
} else if ((c & 0xc0) != 0x80) {
825+
++offset;
826+
}
827+
}
828+
return pos;
829+
} else {
830+
const unsigned char *u8_tbl = mbfl_encoding_utf8.mblen_table;
831+
const unsigned char *pos = str->val;
832+
const unsigned char *end = str->val + str->len;
833+
while (offset-- > 0) {
834+
if (pos >= end) {
835+
return NULL;
836+
}
837+
pos += u8_tbl[*pos];
838+
}
839+
return pos;
840+
}
841+
}
842+
812843
size_t
813844
mbfl_strpos(
814845
mbfl_string *haystack,
@@ -819,7 +850,7 @@ mbfl_strpos(
819850
size_t result;
820851
mbfl_string _haystack_u8, _needle_u8;
821852
const mbfl_string *haystack_u8, *needle_u8 = NULL;
822-
const unsigned char *u8_tbl = mbfl_encoding_utf8.mblen_table;
853+
const unsigned char *offset_pointer;
823854

824855
if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
825856
mbfl_string_init(&_haystack_u8);
@@ -843,22 +874,19 @@ mbfl_strpos(
843874
needle_u8 = needle;
844875
}
845876

877+
offset_pointer = mbfl_find_offset_utf8(haystack_u8, offset);
878+
if (!offset_pointer) {
879+
result = MBFL_ERROR_OFFSET;
880+
goto out;
881+
}
882+
846883
result = MBFL_ERROR_NOT_FOUND;
847884
if (haystack_u8->len < needle_u8->len) {
848885
goto out;
849886
}
850887

851888
if (needle_u8->len == 0) {
852889
size_t haystack_length = mbfl_strlen(haystack_u8);
853-
/* Check if offset is out of bound */
854-
if (
855-
(offset > 0 && offset > haystack_length)
856-
|| (offset < 0 && -offset > haystack_length)
857-
) {
858-
result = -16;
859-
goto out;
860-
}
861-
862890
if (offset < 0) {
863891
result = haystack_length + offset;
864892
} else if (reverse) {
@@ -883,15 +911,7 @@ mbfl_strpos(
883911
jtbl[needle_u8_val[i]] = needle_u8_len - i;
884912
}
885913
e = haystack_u8_val + haystack_u8->len;
886-
p = haystack_u8_val;
887-
while (offset-- > 0) {
888-
if (p >= e) {
889-
result = MBFL_ERROR_OFFSET;
890-
goto out;
891-
}
892-
p += u8_tbl[*p];
893-
}
894-
p += needle_u8_len;
914+
p = offset_pointer + needle_u8_len;
895915
if (p > e) {
896916
goto out;
897917
}
@@ -968,14 +988,7 @@ mbfl_strpos(
968988
}
969989
}
970990
} else {
971-
const unsigned char *ee = haystack_u8_val + haystack_u8->len;
972-
while (offset-- > 0) {
973-
if (e >= ee) {
974-
result = MBFL_ERROR_OFFSET;
975-
goto out;
976-
}
977-
e += u8_tbl[*e];
978-
}
991+
e = offset_pointer;
979992
}
980993
if (p < e + needle_u8_len) {
981994
goto out;

ext/mbstring/mbstring.c

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,6 +2079,22 @@ PHP_FUNCTION(mb_strlen)
20792079
}
20802080
/* }}} */
20812081

2082+
static void handle_strpos_error(size_t error) {
2083+
switch (error) {
2084+
case MBFL_ERROR_NOT_FOUND:
2085+
break;
2086+
case MBFL_ERROR_ENCODING:
2087+
php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2088+
break;
2089+
case MBFL_ERROR_OFFSET:
2090+
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2091+
break;
2092+
default:
2093+
php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2094+
break;
2095+
}
2096+
}
2097+
20822098
/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
20832099
Find position of first occurrence of a string within another */
20842100
PHP_FUNCTION(mb_strpos)
@@ -2099,34 +2115,11 @@ PHP_FUNCTION(mb_strpos)
20992115
RETURN_FALSE;
21002116
}
21012117

2102-
if (offset != 0) {
2103-
size_t slen = mbfl_strlen(&haystack);
2104-
if (offset < 0) {
2105-
offset += slen;
2106-
}
2107-
if (offset < 0 || offset > slen) {
2108-
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2109-
RETURN_FALSE;
2110-
}
2111-
}
2112-
21132118
n = mbfl_strpos(&haystack, &needle, offset, reverse);
21142119
if (!mbfl_is_error(n)) {
21152120
RETVAL_LONG(n);
21162121
} else {
2117-
switch (n) {
2118-
case MBFL_ERROR_NOT_FOUND:
2119-
break;
2120-
case MBFL_ERROR_ENCODING:
2121-
php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2122-
break;
2123-
case MBFL_ERROR_OFFSET:
2124-
php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2125-
break;
2126-
default:
2127-
php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2128-
break;
2129-
}
2122+
handle_strpos_error(n);
21302123
RETVAL_FALSE;
21312124
}
21322125
}
@@ -2150,19 +2143,11 @@ PHP_FUNCTION(mb_strrpos)
21502143
RETURN_FALSE;
21512144
}
21522145

2153-
if (offset != 0) {
2154-
size_t haystack_char_len = mbfl_strlen(&haystack);
2155-
if ((offset > 0 && offset > haystack_char_len) ||
2156-
(offset < 0 && -offset > haystack_char_len)) {
2157-
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2158-
RETURN_FALSE;
2159-
}
2160-
}
2161-
21622146
n = mbfl_strpos(&haystack, &needle, offset, 1);
21632147
if (!mbfl_is_error(n)) {
21642148
RETVAL_LONG(n);
21652149
} else {
2150+
handle_strpos_error(n);
21662151
RETVAL_FALSE;
21672152
}
21682153
}

ext/mbstring/tests/bug43841.phpt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,25 @@ foreach ($offsets as $i) {
4141
-- Offset is -25 --
4242
Multibyte String:
4343

44-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
44+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
4545
bool(false)
4646
ASCII String:
4747
mb_strrpos:
4848

49-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
49+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
5050
bool(false)
5151
strrpos:
5252
Offset not contained in string
5353

5454
-- Offset is -24 --
5555
Multibyte String:
5656

57-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
57+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
5858
bool(false)
5959
ASCII String:
6060
mb_strrpos:
6161

62-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
62+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
6363
bool(false)
6464
strrpos:
6565
Offset not contained in string

ext/mbstring/tests/bug45923.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ bool(false)
168168
bool(false)
169169
> Offset: 12
170170

171-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
171+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
172172
bool(false)
173173
> Offset: -1
174174
int(8)
@@ -178,7 +178,7 @@ int(8)
178178
int(4)
179179
> Offset: -20
180180

181-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
181+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
182182
bool(false)
183183

184184
------- strripos -----------
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
--TEST--
2+
Offset errors for various strpos functions
3+
--FILE--
4+
<?php
5+
6+
var_dump(mb_strpos("f", "bar", 3));
7+
var_dump(mb_strpos("f", "bar", -3));
8+
var_dump(mb_strrpos("f", "bar", 3));
9+
var_dump(mb_strrpos("f", "bar", -3));
10+
var_dump(mb_stripos("f", "bar", 3));
11+
var_dump(mb_stripos("f", "bar", -3));
12+
var_dump(mb_strripos("f", "bar", 3));
13+
var_dump(mb_strripos("f", "bar", -3));
14+
15+
?>
16+
--EXPECTF--
17+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
18+
bool(false)
19+
20+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
21+
bool(false)
22+
23+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
24+
bool(false)
25+
26+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
27+
bool(false)
28+
29+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
30+
bool(false)
31+
32+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
33+
bool(false)
34+
35+
Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d
36+
bool(false)
37+
38+
Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d
39+
bool(false)

ext/mbstring/tests/mb_strrpos_empty_needle.phpt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@ int(5)
5858

5959
-- ASCII string with out of bound positive offset --
6060

61-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
61+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
6262
bool(false)
6363

6464
-- ASCII string with out of bound negative offset --
6565

66-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
66+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
6767
bool(false)
6868

6969
-- Multi-byte string without offset --
@@ -77,10 +77,10 @@ int(19)
7777

7878
-- Multi-byte string with out of bound positive offset --
7979

80-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
80+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
8181
bool(false)
8282

8383
-- Multi-byte string with out of bound negative offset --
8484

85-
Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
85+
Warning: mb_strrpos(): Offset not contained in string in %s on line %d
8686
bool(false)

0 commit comments

Comments
 (0)