Skip to content

Commit d860e06

Browse files
committed
Allow empty needles in mb_strpos and mb_strstr function family.
MBstring analogous implementation to 6d57848
1 parent 72a5fde commit d860e06

10 files changed

+481
-30
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ filter_count_output(int c, void *data)
656656
}
657657

658658
size_t
659-
mbfl_strlen(mbfl_string *string)
659+
mbfl_strlen(const mbfl_string *string)
660660
{
661661
size_t len, n, k;
662662
unsigned char *p;
@@ -855,13 +855,33 @@ mbfl_strpos(
855855
needle_u8 = needle;
856856
}
857857

858-
if (needle_u8->len < 1) {
859-
result = (size_t) -8;
858+
result = (size_t) -1;
859+
if (haystack_u8->len < needle_u8->len) {
860860
goto out;
861861
}
862862

863-
result = (size_t) -1;
864-
if (haystack_u8->len < needle_u8->len) {
863+
if (needle_u8->len == 0) {
864+
/* Out of bound offset */
865+
if (
866+
(offset > 0 && offset > mbfl_strlen(haystack_u8))
867+
|| (offset < 0 && -offset > mbfl_strlen(haystack_u8))
868+
) {
869+
return -16;
870+
}
871+
872+
if (reverse) {
873+
if (offset < 0) {
874+
result = (size_t) -offset;
875+
} else {
876+
result = mbfl_strlen(haystack_u8) - offset;;
877+
}
878+
} else {
879+
if (offset < 0) {
880+
result = mbfl_strlen(haystack_u8) + offset;
881+
} else {
882+
result = (size_t) offset;
883+
}
884+
}
865885
goto out;
866886
}
867887

ext/mbstring/libmbfl/mbfl/mbfilter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ static inline int mbfl_is_error(size_t len) {
193193
* strlen
194194
*/
195195
MBFLAPI extern size_t
196-
mbfl_strlen(mbfl_string *string);
196+
mbfl_strlen(const mbfl_string *string);
197197

198198
/*
199199
* oddlen

ext/mbstring/mbstring.c

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,11 +2110,6 @@ PHP_FUNCTION(mb_strpos)
21102110
}
21112111
}
21122112

2113-
if (needle.len == 0) {
2114-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2115-
RETURN_FALSE;
2116-
}
2117-
21182113
n = mbfl_strpos(&haystack, &needle, offset, reverse);
21192114
if (!mbfl_is_error(n)) {
21202115
RETVAL_LONG(n);
@@ -2222,11 +2217,6 @@ PHP_FUNCTION(mb_stripos)
22222217
return;
22232218
}
22242219

2225-
if (needle.len == 0) {
2226-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2227-
RETURN_FALSE;
2228-
}
2229-
22302220
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
22312221

22322222
if (!mbfl_is_error(n)) {
@@ -2279,11 +2269,6 @@ PHP_FUNCTION(mb_strstr)
22792269
RETURN_FALSE;
22802270
}
22812271

2282-
if (needle.len == 0) {
2283-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2284-
RETURN_FALSE;
2285-
}
2286-
22872272
n = mbfl_strpos(&haystack, &needle, 0, 0);
22882273
if (!mbfl_is_error(n)) {
22892274
if (part) {
@@ -2383,11 +2368,6 @@ PHP_FUNCTION(mb_stristr)
23832368
RETURN_FALSE;
23842369
}
23852370

2386-
if (!needle.len) {
2387-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2388-
RETURN_FALSE;
2389-
}
2390-
23912371
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
23922372
if (mbfl_is_error(n)) {
23932373
RETURN_FALSE;
@@ -4882,10 +4862,6 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
48824862
break;
48834863
}
48844864

4885-
if (needle.len == 0) {
4886-
break;
4887-
}
4888-
48894865
if (offset != 0) {
48904866
size_t haystack_char_len = mbfl_strlen(&haystack);
48914867

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
--TEST--
2+
Test mb_stripos() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_stripos') or die("skip mb_stripos() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string without offset --\n";
18+
var_dump(mb_stripos($string_ascii, ''));
19+
20+
echo "\n-- ASCII string with in range positive offset --\n";
21+
var_dump(mb_stripos($string_ascii, '', 2));
22+
23+
echo "\n-- ASCII string with in range negative offset --\n";
24+
var_dump(mb_stripos($string_ascii, '', -2));
25+
26+
echo "\n-- ASCII string with out of bound positive offset --\n";
27+
var_dump(mb_stripos($string_ascii, '', 150));
28+
29+
echo "\n-- ASCII string with out of bound negative offset --\n";
30+
var_dump(mb_stripos($string_ascii, '', -150));
31+
32+
33+
echo "\n-- Multi-byte string without offset --\n";
34+
var_dump(mb_stripos($string_mb, ''));
35+
36+
echo "\n-- Multi-byte string with in range positive offset --\n";
37+
var_dump(mb_stripos($string_mb, '', 2));
38+
39+
echo "\n-- Multi-byte string with in range negative offset --\n";
40+
var_dump(mb_stripos($string_mb, '', -2));
41+
42+
echo "\n-- Multi-byte string with out of bound positive offset --\n";
43+
var_dump(mb_stripos($string_mb, '', 150));
44+
45+
echo "\n-- Multi-byte string with out of bound negative offset --\n";
46+
var_dump(mb_stripos($string_mb, '', -150));
47+
48+
?>
49+
--EXPECTF--
50+
-- ASCII string without offset --
51+
int(0)
52+
53+
-- ASCII string with in range positive offset --
54+
int(2)
55+
56+
-- ASCII string with in range negative offset --
57+
int(5)
58+
59+
-- ASCII string with out of bound positive offset --
60+
61+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
62+
bool(false)
63+
64+
-- ASCII string with out of bound negative offset --
65+
66+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
67+
bool(false)
68+
69+
-- Multi-byte string without offset --
70+
int(0)
71+
72+
-- Multi-byte string with in range positive offset --
73+
int(2)
74+
75+
-- Multi-byte string with in range negative offset --
76+
int(19)
77+
78+
-- Multi-byte string with out of bound positive offset --
79+
80+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
81+
bool(false)
82+
83+
-- Multi-byte string with out of bound negative offset --
84+
85+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
86+
bool(false)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Test mb_stristr() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_stristr') or die("skip mb_stristr() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string --\n";
18+
var_dump(mb_stristr($string_ascii, '', false, 'ISO-8859-1'));
19+
var_dump(mb_stristr($string_ascii, ''));
20+
var_dump(mb_stristr($string_ascii, '', true));
21+
22+
echo "\n-- Multibyte string --\n";
23+
var_dump(mb_stristr($string_mb, ''));
24+
var_dump(mb_stristr($string_mb, '', false, 'utf-8'));
25+
var_dump(mb_stristr($string_mb, '', true));
26+
27+
?>
28+
--EXPECT--
29+
-- ASCII string --
30+
string(7) "abc def"
31+
string(7) "abc def"
32+
string(0) ""
33+
34+
-- Multibyte string --
35+
string(53) "日本語テキストです。0123456789。"
36+
string(53) "日本語テキストです。0123456789。"
37+
string(0) ""
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
--TEST--
2+
Test mb_strpos() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_strpos') or die("skip mb_strpos() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string without offset --\n";
18+
var_dump(mb_strpos($string_ascii, ''));
19+
20+
echo "\n-- ASCII string with in range positive offset --\n";
21+
var_dump(mb_strpos($string_ascii, '', 2));
22+
23+
echo "\n-- ASCII string with in range negative offset --\n";
24+
var_dump(mb_strpos($string_ascii, '', -2));
25+
26+
echo "\n-- ASCII string with out of bound positive offset --\n";
27+
var_dump(mb_strpos($string_ascii, '', 15));
28+
29+
echo "\n-- ASCII string with out of bound negative offset --\n";
30+
var_dump(mb_strpos($string_ascii, '', -15));
31+
32+
33+
echo "\n-- Multi-byte string without offset --\n";
34+
var_dump(mb_strpos($string_mb, ''));
35+
36+
echo "\n-- Multi-byte string with in range positive offset --\n";
37+
var_dump(mb_strpos($string_mb, '', 2));
38+
39+
echo "\n-- Multi-byte string with in range negative offset --\n";
40+
var_dump(mb_strpos($string_mb, '', -2));
41+
42+
echo "\n-- Multi-byte string with out of bound positive offset --\n";
43+
var_dump(mb_strpos($string_mb, '', 150));
44+
45+
echo "\n-- Multi-byte string with out of bound negative offset --\n";
46+
var_dump(mb_strpos($string_mb, '', -150));
47+
48+
?>
49+
--EXPECTF--
50+
-- ASCII string without offset --
51+
int(0)
52+
53+
-- ASCII string with in range positive offset --
54+
int(2)
55+
56+
-- ASCII string with in range negative offset --
57+
int(5)
58+
59+
-- ASCII string with out of bound positive offset --
60+
61+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
62+
bool(false)
63+
64+
-- ASCII string with out of bound negative offset --
65+
66+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
67+
bool(false)
68+
69+
-- Multi-byte string without offset --
70+
int(0)
71+
72+
-- Multi-byte string with in range positive offset --
73+
int(2)
74+
75+
-- Multi-byte string with in range negative offset --
76+
int(19)
77+
78+
-- Multi-byte string with out of bound positive offset --
79+
80+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
81+
bool(false)
82+
83+
-- Multi-byte string with out of bound negative offset --
84+
85+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
86+
bool(false)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Test mb_strrchr() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_strrchr') or die("skip mb_strrchr() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string --\n";
18+
var_dump(bin2hex(mb_strrchr($string_ascii, '', false, 'ISO-8859-1')));
19+
var_dump(bin2hex(mb_strrchr($string_ascii, '')));
20+
var_dump(bin2hex(mb_strrchr($string_ascii, '', true)));
21+
22+
echo "\n-- Multibyte string --\n";
23+
var_dump(bin2hex(mb_strrchr($string_mb, '')));
24+
var_dump(bin2hex(mb_strrchr($string_mb, '', false, 'utf-8')));
25+
var_dump(bin2hex(mb_strrchr($string_mb, '', true)));
26+
27+
?>
28+
--EXPECT--
29+
-- ASCII string --
30+
string(0) ""
31+
string(0) ""
32+
string(0) ""
33+
34+
-- Multibyte string --
35+
string(0) ""
36+
string(0) ""
37+
string(0) ""

0 commit comments

Comments
 (0)