Skip to content

Commit 0ea83ff

Browse files
committed
mb_split() can now handle empty matches like preg_split() does.
1 parent 92a7924 commit 0ea83ff

File tree

4 files changed

+49
-40
lines changed

4 files changed

+49
-40
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ PHP NEWS
55
- CLI server:
66
. Fixed bug #64128 (buit-in web server is broken on ppc64). (Remi)
77

8+
- Mbstring:
9+
. mb_split() can now handle empty matches like preg_split() does. (Moriyoshi)
10+
811
?? ??? 2012, PHP 5.4.12
912

1013
- Core:

ext/mbstring/php_mbregex.c

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ PHP_FUNCTION(mb_split)
10551055
php_mb_regex_t *re;
10561056
OnigRegion *regs = NULL;
10571057
char *string;
1058-
OnigUChar *pos;
1058+
OnigUChar *pos, *chunk_pos;
10591059
int string_len;
10601060

10611061
int n, err;
@@ -1065,8 +1065,8 @@ PHP_FUNCTION(mb_split)
10651065
RETURN_FALSE;
10661066
}
10671067

1068-
if (count == 0) {
1069-
count = 1;
1068+
if (count > 0) {
1069+
count--;
10701070
}
10711071

10721072
/* create regex pattern buffer */
@@ -1076,31 +1076,30 @@ PHP_FUNCTION(mb_split)
10761076

10771077
array_init(return_value);
10781078

1079-
pos = (OnigUChar *)string;
1079+
chunk_pos = pos = (OnigUChar *)string;
10801080
err = 0;
10811081
regs = onig_region_new();
10821082
/* churn through str, generating array entries as we go */
1083-
while ((--count != 0) &&
1084-
(err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
1085-
if (regs->beg[0] == regs->end[0]) {
1086-
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1083+
while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1084+
int beg, end;
1085+
err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086+
if (err < 0) {
10871087
break;
10881088
}
1089-
1089+
beg = regs->beg[0], end = regs->end[0];
10901090
/* add it to the array */
1091-
if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
1092-
add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
1091+
if ((pos - (OnigUChar *)string) < end) {
1092+
if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093+
add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1094+
--count;
1095+
} else {
1096+
err = -2;
1097+
break;
1098+
}
1099+
/* point at our new starting point */
1100+
chunk_pos = pos = (OnigUChar *)string + end;
10931101
} else {
1094-
err = -2;
1095-
break;
1096-
}
1097-
/* point at our new starting point */
1098-
n = regs->end[0];
1099-
if ((pos - (OnigUChar *)string) < n) {
1100-
pos = (OnigUChar *)string + n;
1101-
}
1102-
if (count < 0) {
1103-
count = 0;
1102+
pos++;
11041103
}
11051104
onig_region_free(regs, 0);
11061105
}
@@ -1117,9 +1116,9 @@ PHP_FUNCTION(mb_split)
11171116
}
11181117

11191118
/* otherwise we just have one last element to add to the array */
1120-
n = ((OnigUChar *)(string + string_len) - pos);
1119+
n = ((OnigUChar *)(string + string_len) - chunk_pos);
11211120
if (n > 0) {
1122-
add_next_index_stringl(return_value, (char *)pos, n, 1);
1121+
add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
11231122
} else {
11241123
add_next_index_stringl(return_value, "", 0, 1);
11251124
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
--TEST--
2+
mb_split() empty match
3+
--
4+
--SKIPIF--
5+
<?php
6+
extension_loaded('mbstring') or die('skip');
7+
function_exists('mb_split') or die("skip mb_split() is not available in this build");
8+
?>
9+
--FILE--
10+
<?php
11+
mb_regex_set_options('m');
12+
var_dump(mb_split('^', "a\nb\nc"));
13+
--EXPECT--
14+
array(3) {
15+
[0]=>
16+
string(2) "a
17+
"
18+
[1]=>
19+
string(2) "b
20+
"
21+
[2]=>
22+
string(1) "c"
23+
}

ext/mbstring/tests/mb_split_variation1.phpt

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,12 @@ array(1) {
156156
}
157157

158158
-- Iteration 10 --
159-
160-
Warning: mb_split(): Empty regular expression in %s on line %d
161159
array(1) {
162160
[0]=>
163161
string(13) "a b c d e f g"
164162
}
165163

166164
-- Iteration 11 --
167-
168-
Warning: mb_split(): Empty regular expression in %s on line %d
169165
array(1) {
170166
[0]=>
171167
string(13) "a b c d e f g"
@@ -178,8 +174,6 @@ array(1) {
178174
}
179175

180176
-- Iteration 13 --
181-
182-
Warning: mb_split(): Empty regular expression in %s on line %d
183177
array(1) {
184178
[0]=>
185179
string(13) "a b c d e f g"
@@ -192,24 +186,18 @@ array(1) {
192186
}
193187

194188
-- Iteration 15 --
195-
196-
Warning: mb_split(): Empty regular expression in %s on line %d
197189
array(1) {
198190
[0]=>
199191
string(13) "a b c d e f g"
200192
}
201193

202194
-- Iteration 16 --
203-
204-
Warning: mb_split(): Empty regular expression in %s on line %d
205195
array(1) {
206196
[0]=>
207197
string(13) "a b c d e f g"
208198
}
209199

210200
-- Iteration 17 --
211-
212-
Warning: mb_split(): Empty regular expression in %s on line %d
213201
array(1) {
214202
[0]=>
215203
string(13) "a b c d e f g"
@@ -240,16 +228,12 @@ array(1) {
240228
}
241229

242230
-- Iteration 22 --
243-
244-
Warning: mb_split(): Empty regular expression in %s on line %d
245231
array(1) {
246232
[0]=>
247233
string(13) "a b c d e f g"
248234
}
249235

250236
-- Iteration 23 --
251-
252-
Warning: mb_split(): Empty regular expression in %s on line %d
253237
array(1) {
254238
[0]=>
255239
string(13) "a b c d e f g"
@@ -259,4 +243,4 @@ array(1) {
259243

260244
Warning: mb_split() expects parameter 1 to be string, resource given in %s on line %d
261245
bool(false)
262-
Done
246+
Done

0 commit comments

Comments
 (0)