Skip to content

add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns and empty matches #2526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,6 @@ PHP 7.2 UPGRADE NOTES
parameter (assoc) is null. Previously JSON_OBJECT_AS_ARRAY was always
ignored.

- PCRE:
. preg_match() and other PCRE functions now distinguish between unmatched
subpatterns and empty matches by reporting NULL and "" (empty string),
respectively. Formerly, either was reported as empty string.

- Session:
. Removed register_globals related code and "!" can be used as $_SESSION key name.
. Session is made to manage session status correctly and prevents invalid operations.
Expand Down Expand Up @@ -106,6 +101,9 @@ PHP 7.2 UPGRADE NOTES

- PCRE:
. Added `J` modifier for setting PCRE_DUPNAMES.
. Added `PREG_UNMATCHED_AS_NULL` flag to allow distinguish between unmatched
subpatterns and empty matches by reporting NULL and "" (empty string),
respectively.

- Standard:
. Simplified password hashing API updated to support Argon2i hashes when PHP is compiled with libargon2
Expand Down Expand Up @@ -274,6 +272,9 @@ See also: https://wiki.php.net/rfc/deprecations_php_7_2
. IMG_EFFECT_MULTIPLY
. IMG_BMP

- PCRE
. PREG_UNMATCHED_AS_NULL

- Standard:
. PASSWORD_ARGON2_DEFAULT_MEMORY_COST
. PASSWORD_ARGON2_DEFAULT_TIME_COST
Expand Down
66 changes: 37 additions & 29 deletions ext/pcre/php_pcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#define PREG_PATTERN_ORDER 1
#define PREG_SET_ORDER 2
#define PREG_OFFSET_CAPTURE (1<<8)
#define PREG_UNMATCHED_AS_NULL (1<<9)

#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
Expand Down Expand Up @@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
Expand Down Expand Up @@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra,
/* }}} */

/* {{{ add_offset_pair */
static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
{
zval match_pair, tmp;

array_init_size(&match_pair, 2);

/* Add (match, offset) to the return value */
if (offset < 0) { /* unset substring */
if (unmatched_as_null && offset < 0) {
ZVAL_NULL(&tmp);
} else {
ZVAL_STRINGL(&tmp, str, len);
Expand Down Expand Up @@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
{
zval result_set, /* Holds a set of subpatterns after
a global match */
*match_sets = NULL; /* An array of sets of matches for each
*match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
pcre_extra *extra = pce->extra;/* Holds results of studying */
pcre_extra extra_data; /* Used locally for exec options */
Expand All @@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
char **subpat_names; /* Array for named subpatterns */
int i;
int subpats_order; /* Order of subpattern matches */
int offset_capture; /* Capture match offsets: yes/no */
unsigned char *mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
int offset_capture; /* Capture match offsets: yes/no */
int unmatched_as_null; /* Null non-matches: yes/no */
unsigned char *mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
ALLOCA_FLAG(use_heap);

ZVAL_UNDEF(&marks);
Expand All @@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec

if (use_flags) {
offset_capture = flags & PREG_OFFSET_CAPTURE;
unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;

/*
* subpats_order is pre-set to pattern mode so we change it only if
Expand All @@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
offset_capture = 0;
unmatched_as_null = 0;
}

/* Negative offset counts from the end of the string. */
Expand Down Expand Up @@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&match_sets[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&match_sets[i]);
} else {
add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
Expand All @@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
/*
* If the number of captured subpatterns on this run is
* less than the total possible number, pad the result
* arrays with NULLs.
* arrays with NULLs or empty strings.
*/
if (count < num_subpats) {
for (; i < num_subpats; i++) {
add_next_index_null(&match_sets[i]);
if (unmatched_as_null) {
add_next_index_null(&match_sets[i]);
} else {
add_next_index_string(&match_sets[i], "");
}
}
}
} else {
Expand All @@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
if (offsets[i<<1] < 0) { /* unset substring */
add_assoc_null(&result_set, subpat_names[i]);
} else {
if (unmatched_as_null && offsets[i<<1] < 0) {
add_assoc_null(&result_set, subpat_names[i]);
} else {
add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
Expand All @@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
Expand All @@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
offsets[i<<1], subpat_names[i]);
offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_assoc_null(subpats, subpat_names[i]);
} else {
add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
Expand All @@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
offsets[i<<1], NULL);
offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (offsets[i<<1] < 0) { /* unset substring */
if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
Expand Down Expand Up @@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec

if (offset_capture) {
/* Add (match, offset) pair to the return value */
add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
} else {
/* Add the piece to the return value */
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
Expand All @@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
} else {
ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
Expand Down Expand Up @@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec

start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */

if (!no_empty || start_offset < subject_len)
{
if (!no_empty || start_offset < subject_len) {
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
} else {
/* Add the last piece to the return value */
ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
Expand Down
4 changes: 2 additions & 2 deletions ext/pcre/tests/001.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ array(10) {
[2]=>
string(2) "06"
[3]=>
NULL
string(0) ""
["month"]=>
string(2) "12"
[4]=>
Expand All @@ -75,7 +75,7 @@ array(10) {
[2]=>
string(2) "12"
[3]=>
NULL
string(0) ""
["month"]=>
string(3) "Aug"
[4]=>
Expand Down
4 changes: 2 additions & 2 deletions ext/pcre/tests/003.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ array(10) {
[0]=>
string(2) "20"
[1]=>
NULL
string(0) ""
}
["month"]=>
array(2) {
Expand Down Expand Up @@ -127,7 +127,7 @@ array(2) {
[2]=>
string(2) "12"
[3]=>
NULL
string(0) ""
["month"]=>
string(3) "Aug"
[4]=>
Expand Down
6 changes: 3 additions & 3 deletions ext/pcre/tests/004.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ array(2) {
[1]=>
string(12) "unsigned int"
[2]=>
NULL
string(0) ""
[3]=>
string(0) ""
[4]=>
Expand All @@ -41,13 +41,13 @@ array(2) {
[1]=>
string(5) "short"
[2]=>
NULL
string(0) ""
[3]=>
string(0) ""
[4]=>
string(1) "a"
[5]=>
NULL
string(0) ""
[6]=>
string(3) ", b"
}
Expand Down
2 changes: 1 addition & 1 deletion ext/pcre/tests/bug61780.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Bug #61780 (Inconsistent PCRE captures in match results): basics
--FILE--
<?php
preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches);
preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches, PREG_UNMATCHED_AS_NULL);
var_dump($matches);
?>
--EXPECT--
Expand Down
12 changes: 6 additions & 6 deletions ext/pcre/tests/bug61780_1.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@
Bug #61780 (Inconsistent PCRE captures in match results): numeric subpatterns
--FILE--
<?php
preg_match('/(4)?(2)?\d/', '23456', $matches);
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(4)?(2)?\d/', '123456', $matches);
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER);
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
?>
--EXPECT--
Expand Down
12 changes: 6 additions & 6 deletions ext/pcre/tests/bug61780_2.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@
Bug #61780 (Inconsistent PCRE captures in match results): named subpatterns
--FILE--
<?php
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches);
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches);
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER);
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
var_export($matches);
echo "\n\n";
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
var_export($matches);
?>
--EXPECT--
Expand Down
Loading