Skip to content

Commit 483efc7

Browse files
committed
Allow empty needles in mb_strpos and mb_strstr function family.
MBstring analogous implementation to 6d57848 Closes GH-4977
1 parent b8609e2 commit 483efc7

10 files changed

+477
-30
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ filter_count_output(int c, void *data)
656656
}
657657

658658
size_t
659-
mbfl_strlen(mbfl_string *string)
659+
mbfl_strlen(const mbfl_string *string)
660660
{
661661
size_t len, n, k;
662662
unsigned char *p;
@@ -855,13 +855,29 @@ mbfl_strpos(
855855
needle_u8 = needle;
856856
}
857857

858-
if (needle_u8->len < 1) {
859-
result = (size_t) -8;
858+
result = (size_t) -1;
859+
if (haystack_u8->len < needle_u8->len) {
860860
goto out;
861861
}
862862

863-
result = (size_t) -1;
864-
if (haystack_u8->len < needle_u8->len) {
863+
if (needle_u8->len == 0) {
864+
size_t haystack_length = mbfl_strlen(haystack_u8);
865+
/* Check if offset is out of bound */
866+
if (
867+
(offset > 0 && offset > haystack_length)
868+
|| (offset < 0 && -offset > haystack_length)
869+
) {
870+
result = -16;
871+
goto out;
872+
}
873+
874+
if (offset < 0) {
875+
result = haystack_length + offset;
876+
} else if (reverse) {
877+
result = haystack_length;
878+
} else {
879+
result = (size_t) offset;
880+
}
865881
goto out;
866882
}
867883

ext/mbstring/libmbfl/mbfl/mbfilter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ static inline int mbfl_is_error(size_t len) {
193193
* strlen
194194
*/
195195
MBFLAPI extern size_t
196-
mbfl_strlen(mbfl_string *string);
196+
mbfl_strlen(const mbfl_string *string);
197197

198198
/*
199199
* oddlen

ext/mbstring/mbstring.c

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,11 +2110,6 @@ PHP_FUNCTION(mb_strpos)
21102110
}
21112111
}
21122112

2113-
if (needle.len == 0) {
2114-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2115-
RETURN_FALSE;
2116-
}
2117-
21182113
n = mbfl_strpos(&haystack, &needle, offset, reverse);
21192114
if (!mbfl_is_error(n)) {
21202115
RETVAL_LONG(n);
@@ -2189,11 +2184,6 @@ PHP_FUNCTION(mb_stripos)
21892184
RETURN_THROWS();
21902185
}
21912186

2192-
if (needle.len == 0) {
2193-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2194-
RETURN_FALSE;
2195-
}
2196-
21972187
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
21982188

21992189
if (!mbfl_is_error(n)) {
@@ -2246,11 +2236,6 @@ PHP_FUNCTION(mb_strstr)
22462236
RETURN_FALSE;
22472237
}
22482238

2249-
if (needle.len == 0) {
2250-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2251-
RETURN_FALSE;
2252-
}
2253-
22542239
n = mbfl_strpos(&haystack, &needle, 0, 0);
22552240
if (!mbfl_is_error(n)) {
22562241
if (part) {
@@ -2350,11 +2335,6 @@ PHP_FUNCTION(mb_stristr)
23502335
RETURN_FALSE;
23512336
}
23522337

2353-
if (!needle.len) {
2354-
php_error_docref(NULL, E_WARNING, "Empty delimiter");
2355-
RETURN_FALSE;
2356-
}
2357-
23582338
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
23592339
if (mbfl_is_error(n)) {
23602340
RETURN_FALSE;
@@ -4849,10 +4829,6 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
48494829
break;
48504830
}
48514831

4852-
if (needle.len == 0) {
4853-
break;
4854-
}
4855-
48564832
if (offset != 0) {
48574833
size_t haystack_char_len = mbfl_strlen(&haystack);
48584834

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
--TEST--
2+
Test mb_stripos() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_stripos') or die("skip mb_stripos() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string without offset --\n";
18+
var_dump(mb_stripos($string_ascii, ''));
19+
20+
echo "\n-- ASCII string with in range positive offset --\n";
21+
var_dump(mb_stripos($string_ascii, '', 2));
22+
23+
echo "\n-- ASCII string with in range negative offset --\n";
24+
var_dump(mb_stripos($string_ascii, '', -2));
25+
26+
echo "\n-- ASCII string with out of bound positive offset --\n";
27+
var_dump(mb_stripos($string_ascii, '', 150));
28+
29+
echo "\n-- ASCII string with out of bound negative offset --\n";
30+
var_dump(mb_stripos($string_ascii, '', -150));
31+
32+
33+
echo "\n-- Multi-byte string without offset --\n";
34+
var_dump(mb_stripos($string_mb, ''));
35+
36+
echo "\n-- Multi-byte string with in range positive offset --\n";
37+
var_dump(mb_stripos($string_mb, '', 2));
38+
39+
echo "\n-- Multi-byte string with in range negative offset --\n";
40+
var_dump(mb_stripos($string_mb, '', -2));
41+
42+
echo "\n-- Multi-byte string with out of bound positive offset --\n";
43+
var_dump(mb_stripos($string_mb, '', 150));
44+
45+
echo "\n-- Multi-byte string with out of bound negative offset --\n";
46+
var_dump(mb_stripos($string_mb, '', -150));
47+
48+
?>
49+
--EXPECTF--
50+
-- ASCII string without offset --
51+
int(0)
52+
53+
-- ASCII string with in range positive offset --
54+
int(2)
55+
56+
-- ASCII string with in range negative offset --
57+
int(5)
58+
59+
-- ASCII string with out of bound positive offset --
60+
61+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
62+
bool(false)
63+
64+
-- ASCII string with out of bound negative offset --
65+
66+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
67+
bool(false)
68+
69+
-- Multi-byte string without offset --
70+
int(0)
71+
72+
-- Multi-byte string with in range positive offset --
73+
int(2)
74+
75+
-- Multi-byte string with in range negative offset --
76+
int(19)
77+
78+
-- Multi-byte string with out of bound positive offset --
79+
80+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
81+
bool(false)
82+
83+
-- Multi-byte string with out of bound negative offset --
84+
85+
Warning: mb_stripos(): Offset not contained in string in %s on line %d
86+
bool(false)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Test mb_stristr() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_stristr') or die("skip mb_stristr() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string --\n";
18+
var_dump(mb_stristr($string_ascii, '', false, 'ISO-8859-1'));
19+
var_dump(mb_stristr($string_ascii, ''));
20+
var_dump(mb_stristr($string_ascii, '', true));
21+
22+
echo "\n-- Multibyte string --\n";
23+
var_dump(mb_stristr($string_mb, ''));
24+
var_dump(mb_stristr($string_mb, '', false, 'utf-8'));
25+
var_dump(mb_stristr($string_mb, '', true));
26+
27+
?>
28+
--EXPECT--
29+
-- ASCII string --
30+
string(7) "abc def"
31+
string(7) "abc def"
32+
string(0) ""
33+
34+
-- Multibyte string --
35+
string(53) "日本語テキストです。0123456789。"
36+
string(53) "日本語テキストです。0123456789。"
37+
string(0) ""
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
--TEST--
2+
Test mb_strpos() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_strpos') or die("skip mb_strpos() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string without offset --\n";
18+
var_dump(mb_strpos($string_ascii, ''));
19+
20+
echo "\n-- ASCII string with in range positive offset --\n";
21+
var_dump(mb_strpos($string_ascii, '', 2));
22+
23+
echo "\n-- ASCII string with in range negative offset --\n";
24+
var_dump(mb_strpos($string_ascii, '', -2));
25+
26+
echo "\n-- ASCII string with out of bound positive offset --\n";
27+
var_dump(mb_strpos($string_ascii, '', 15));
28+
29+
echo "\n-- ASCII string with out of bound negative offset --\n";
30+
var_dump(mb_strpos($string_ascii, '', -15));
31+
32+
33+
echo "\n-- Multi-byte string without offset --\n";
34+
var_dump(mb_strpos($string_mb, ''));
35+
36+
echo "\n-- Multi-byte string with in range positive offset --\n";
37+
var_dump(mb_strpos($string_mb, '', 2));
38+
39+
echo "\n-- Multi-byte string with in range negative offset --\n";
40+
var_dump(mb_strpos($string_mb, '', -2));
41+
42+
echo "\n-- Multi-byte string with out of bound positive offset --\n";
43+
var_dump(mb_strpos($string_mb, '', 150));
44+
45+
echo "\n-- Multi-byte string with out of bound negative offset --\n";
46+
var_dump(mb_strpos($string_mb, '', -150));
47+
48+
?>
49+
--EXPECTF--
50+
-- ASCII string without offset --
51+
int(0)
52+
53+
-- ASCII string with in range positive offset --
54+
int(2)
55+
56+
-- ASCII string with in range negative offset --
57+
int(5)
58+
59+
-- ASCII string with out of bound positive offset --
60+
61+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
62+
bool(false)
63+
64+
-- ASCII string with out of bound negative offset --
65+
66+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
67+
bool(false)
68+
69+
-- Multi-byte string without offset --
70+
int(0)
71+
72+
-- Multi-byte string with in range positive offset --
73+
int(2)
74+
75+
-- Multi-byte string with in range negative offset --
76+
int(19)
77+
78+
-- Multi-byte string with out of bound positive offset --
79+
80+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
81+
bool(false)
82+
83+
-- Multi-byte string with out of bound negative offset --
84+
85+
Warning: mb_strpos(): Offset not contained in string in %s on line %d
86+
bool(false)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Test mb_strrchr() function : with empty needle
3+
--SKIPIF--
4+
<?php
5+
extension_loaded('mbstring') or die('skip');
6+
function_exists('mb_strrchr') or die("skip mb_strrchr() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
11+
mb_internal_encoding('UTF-8');
12+
13+
$string_ascii = 'abc def';
14+
// Japanese string in UTF-8
15+
$string_mb = "日本語テキストです。0123456789。";
16+
17+
echo "\n-- ASCII string --\n";
18+
var_dump(bin2hex(mb_strrchr($string_ascii, '', false, 'ISO-8859-1')));
19+
var_dump(bin2hex(mb_strrchr($string_ascii, '')));
20+
var_dump(bin2hex(mb_strrchr($string_ascii, '', true)));
21+
22+
echo "\n-- Multibyte string --\n";
23+
var_dump(bin2hex(mb_strrchr($string_mb, '')));
24+
var_dump(bin2hex(mb_strrchr($string_mb, '', false, 'utf-8')));
25+
var_dump(bin2hex(mb_strrchr($string_mb, '', true)));
26+
27+
?>
28+
--EXPECT--
29+
-- ASCII string --
30+
string(0) ""
31+
string(0) ""
32+
string(0) ""
33+
34+
-- Multibyte string --
35+
string(0) ""
36+
string(0) ""
37+
string(0) ""

0 commit comments

Comments
 (0)