Skip to content

Commit 212f56b

Browse files
ju1iusnikic
ju1ius
authored andcommitted
adds support for named captures to mb_ereg & mb_ereg_search
`mb_ereg`, `mb_ereg_search_regs` & `mb_ereg_search_getregs` returned only numbered capturing groups. Now they return both numbered and named capturing groups. Fixes Bug #72704.
1 parent 69a49af commit 212f56b

File tree

4 files changed

+185
-0
lines changed

4 files changed

+185
-0
lines changed

ext/mbstring/php_mbregex.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
646646
}
647647
/* }}} */
648648

649+
650+
/*
651+
* Callbacks for named subpatterns
652+
*/
653+
654+
/* {{{ struct mb_ereg_groups_iter_arg */
655+
typedef struct mb_regex_groups_iter_args {
656+
zval *groups;
657+
char *search_str;
658+
int search_len;
659+
OnigRegion *region;
660+
} mb_regex_groups_iter_args;
661+
/* }}} */
662+
663+
/* {{{ mb_ereg_groups_iter */
664+
static int
665+
mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
666+
{
667+
mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
668+
int i, gn, ref, beg, end;
669+
670+
for (i = 0; i < ngroup_num; i++) {
671+
gn = group_nums[i];
672+
ref = onig_name_to_backref_number(reg, name, name_end, args->region);
673+
if (ref != gn) {
674+
/*
675+
* In case of duplicate groups, keep only the last suceeding one
676+
* to be consistent with preg_match with the PCRE_DUPNAMES option.
677+
*/
678+
continue;
679+
}
680+
beg = args->region->beg[gn];
681+
end = args->region->end[gn];
682+
if (beg >= 0 && beg < end && end <= args->search_len) {
683+
add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
684+
} else {
685+
add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
686+
}
687+
}
688+
689+
return 0;
690+
}
691+
/* }}} */
692+
649693
/*
650694
* php functions
651695
*/
@@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
762806
add_index_bool(array, i, 0);
763807
}
764808
}
809+
810+
if (onig_number_of_names(re) > 0) {
811+
mb_regex_groups_iter_args args = {array, string, string_len, regs};
812+
onig_foreach_name(re, mb_regex_groups_iter, &args);
813+
}
765814
}
766815

767816
if (match_len == 0) {
@@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
12911340
add_index_bool(return_value, i, 0);
12921341
}
12931342
}
1343+
if (onig_number_of_names(MBREX(search_re)) > 0) {
1344+
mb_regex_groups_iter_args args = {
1345+
return_value,
1346+
Z_STRVAL(MBREX(search_str)),
1347+
Z_STRLEN(MBREX(search_str)),
1348+
MBREX(search_regs)
1349+
};
1350+
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
1351+
}
12941352
break;
12951353
default:
12961354
RETVAL_TRUE;
@@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs)
14171475
add_index_bool(return_value, i, 0);
14181476
}
14191477
}
1478+
if (onig_number_of_names(MBREX(search_re)) > 0) {
1479+
mb_regex_groups_iter_args args = {
1480+
return_value,
1481+
Z_STRVAL(MBREX(search_str)),
1482+
len,
1483+
MBREX(search_regs)
1484+
};
1485+
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
1486+
}
14201487
} else {
14211488
RETVAL_FALSE;
14221489
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Testing mb_ereg() duplicate named groups
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
6+
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
mb_regex_encoding("UTF-8");
11+
$pattern = '\w+((?<punct>?)|(?<punct>!))';
12+
mb_ereg($pattern, '中?', $m);
13+
var_dump($m);
14+
mb_ereg($pattern, '中!', $m);
15+
var_dump($m);
16+
?>
17+
--EXPECT--
18+
array(4) {
19+
[0]=>
20+
string(6) "中?"
21+
[1]=>
22+
string(3) "?"
23+
[2]=>
24+
bool(false)
25+
["punct"]=>
26+
string(3) "?"
27+
}
28+
array(4) {
29+
[0]=>
30+
string(6) "中!"
31+
[1]=>
32+
bool(false)
33+
[2]=>
34+
string(3) "!"
35+
["punct"]=>
36+
string(3) "!"
37+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
--TEST--
2+
Testing mb_ereg() named subpatterns
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
6+
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
mb_regex_encoding("UTF-8");
11+
mb_ereg('(?<wsp>\s*)(?<word>\w+)', ' 中国', $m);
12+
var_dump($m);
13+
mb_ereg('(?<wsp>\s*)(?<word>\w+)', '', $m);
14+
var_dump($m);
15+
mb_ereg('(\s*)(?<word>\w+)', ' 中国', $m);
16+
var_dump($m);
17+
?>
18+
--EXPECT--
19+
array(5) {
20+
[0]=>
21+
string(8) " 中国"
22+
[1]=>
23+
string(2) " "
24+
[2]=>
25+
string(6) "中国"
26+
["wsp"]=>
27+
string(2) " "
28+
["word"]=>
29+
string(6) "中国"
30+
}
31+
array(5) {
32+
[0]=>
33+
string(3) "国"
34+
[1]=>
35+
bool(false)
36+
[2]=>
37+
string(3) "国"
38+
["wsp"]=>
39+
bool(false)
40+
["word"]=>
41+
string(3) "国"
42+
}
43+
array(3) {
44+
[0]=>
45+
string(8) " 中国"
46+
[1]=>
47+
string(6) "中国"
48+
["word"]=>
49+
string(6) "中国"
50+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
--TEST--
2+
Testing mb_ereg_search() named capture groups
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
6+
function_exists('mb_ereg_search') or die("skip mb_ereg_search() is not available in this build");
7+
?>
8+
--FILE--
9+
<?php
10+
mb_regex_encoding("UTF-8");
11+
mb_ereg_search_init(' 中国?');
12+
mb_ereg_search('(?<wsp>\s*)(?<word>\w+)(?<punct>[?!])');
13+
var_dump(mb_ereg_search_getregs());
14+
?>
15+
--EXPECT--
16+
array(7) {
17+
[0]=>
18+
string(11) " 中国?"
19+
[1]=>
20+
string(2) " "
21+
[2]=>
22+
string(6) "中国"
23+
[3]=>
24+
string(3) "?"
25+
["punct"]=>
26+
string(3) "?"
27+
["wsp"]=>
28+
string(2) " "
29+
["word"]=>
30+
string(6) "中国"
31+
}

0 commit comments

Comments
 (0)