From c635bd7a5213589574c0b5113d30d93f02640064 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Tue, 16 May 2017 12:46:32 +0200 Subject: [PATCH] add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns and empty matches --- UPGRADING | 11 +++--- ext/pcre/php_pcre.c | 66 +++++++++++++++++++--------------- ext/pcre/tests/001.phpt | 4 +-- ext/pcre/tests/003.phpt | 4 +-- ext/pcre/tests/004.phpt | 6 ++-- ext/pcre/tests/bug61780.phpt | 2 +- ext/pcre/tests/bug61780_1.phpt | 12 +++---- ext/pcre/tests/bug61780_2.phpt | 12 +++---- ext/pcre/tests/marks.phpt | 40 ++++++++++----------- 9 files changed, 83 insertions(+), 74 deletions(-) diff --git a/UPGRADING b/UPGRADING index 0530958def026..dc663775fc4b3 100644 --- a/UPGRADING +++ b/UPGRADING @@ -56,11 +56,6 @@ PHP 7.2 UPGRADE NOTES parameter (assoc) is null. Previously JSON_OBJECT_AS_ARRAY was always ignored. -- PCRE: - . preg_match() and other PCRE functions now distinguish between unmatched - subpatterns and empty matches by reporting NULL and "" (empty string), - respectively. Formerly, either was reported as empty string. - - Session: . Removed register_globals related code and "!" can be used as $_SESSION key name. . Session is made to manage session status correctly and prevents invalid operations. @@ -106,6 +101,9 @@ PHP 7.2 UPGRADE NOTES - PCRE: . Added `J` modifier for setting PCRE_DUPNAMES. + . Added `PREG_UNMATCHED_AS_NULL` flag to allow distinguish between unmatched + subpatterns and empty matches by reporting NULL and "" (empty string), + respectively. - Standard: . Simplified password hashing API updated to support Argon2i hashes when PHP is compiled with libargon2 @@ -274,6 +272,9 @@ See also: https://wiki.php.net/rfc/deprecations_php_7_2 . IMG_EFFECT_MULTIPLY . IMG_BMP +- PCRE + . PREG_UNMATCHED_AS_NULL + - Standard: . PASSWORD_ARGON2_DEFAULT_MEMORY_COST . PASSWORD_ARGON2_DEFAULT_TIME_COST diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 0890e5247e565..97ccfb37f7a28 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -33,6 +33,7 @@ #define PREG_PATTERN_ORDER 1 #define PREG_SET_ORDER 2 #define PREG_OFFSET_CAPTURE (1<<8) +#define PREG_UNMATCHED_AS_NULL (1<<9) #define PREG_SPLIT_NO_EMPTY (1<<0) #define PREG_SPLIT_DELIM_CAPTURE (1<<1) @@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, /* }}} */ /* {{{ add_offset_pair */ -static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) +static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null) { zval match_pair, tmp; array_init_size(&match_pair, 2); /* Add (match, offset) to the return value */ - if (offset < 0) { /* unset substring */ + if (unmatched_as_null && offset < 0) { ZVAL_NULL(&tmp); } else { ZVAL_STRINGL(&tmp, str, len); @@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec { zval result_set, /* Holds a set of subpatterns after a global match */ - *match_sets = NULL; /* An array of sets of matches for each + *match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec char **subpat_names; /* Array for named subpatterns */ int i; int subpats_order; /* Order of subpattern matches */ - int offset_capture; /* Capture match offsets: yes/no */ - unsigned char *mark = NULL; /* Target for MARK name */ - zval marks; /* Array of marks for PREG_PATTERN_ORDER */ + int offset_capture; /* Capture match offsets: yes/no */ + int unmatched_as_null; /* Null non-matches: yes/no */ + unsigned char *mark = NULL; /* Target for MARK name */ + zval marks; /* Array of marks for PREG_PATTERN_ORDER */ ALLOCA_FLAG(use_heap); ZVAL_UNDEF(&marks); @@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (use_flags) { offset_capture = flags & PREG_OFFSET_CAPTURE; + unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL; /* * subpats_order is pre-set to pattern mode so we change it only if @@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } } else { offset_capture = 0; + unmatched_as_null = 0; } /* Negative offset counts from the end of the string. */ @@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&match_sets[i]); } else { add_next_index_stringl(&match_sets[i], (char *)stringlist[i], @@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result - * arrays with NULLs. + * arrays with NULLs or empty strings. */ if (count < num_subpats) { for (; i < num_subpats; i++) { - add_next_index_null(&match_sets[i]); + if (unmatched_as_null) { + add_next_index_null(&match_sets[i]); + } else { + add_next_index_string(&match_sets[i], ""); + } } } } else { @@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null); } } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - if (offsets[i<<1] < 0) { /* unset substring */ - add_assoc_null(&result_set, subpat_names[i]); - } else { + if (unmatched_as_null && offsets[i<<1] < 0) { + add_assoc_null(&result_set, subpat_names[i]); + } else { add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); - } + offsets[(i<<1)+1] - offsets[i<<1]); + } } - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&result_set); } else { add_next_index_stringl(&result_set, (char *)stringlist[i], @@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&result_set); } else { add_next_index_stringl(&result_set, (char *)stringlist[i], @@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec for (i = 0; i < count; i++) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i]); + offsets[i<<1], subpat_names[i], unmatched_as_null); } } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_assoc_null(subpats, subpat_names[i]); } else { add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1]); } } - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(subpats); } else { add_next_index_stringl(subpats, (char *)stringlist[i], @@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec for (i = 0; i < count; i++) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], NULL); + offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(subpats); } else { add_next_index_stringl(subpats, (char *)stringlist[i], @@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL); + add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0); } else { /* Add the piece to the return value */ ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match); @@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec /* If we have matched a delimiter */ if (!no_empty || match_len > 0) { if (offset_capture) { - add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); + add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0); } else { ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); @@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */ - if (!no_empty || start_offset < subject_len) - { + if (!no_empty || start_offset < subject_len) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); + add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0); } else { /* Add the last piece to the return value */ ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match); diff --git a/ext/pcre/tests/001.phpt b/ext/pcre/tests/001.phpt index 7aeebf3cf6a60..313f7fdc679fb 100644 --- a/ext/pcre/tests/001.phpt +++ b/ext/pcre/tests/001.phpt @@ -52,7 +52,7 @@ array(10) { [2]=> string(2) "06" [3]=> - NULL + string(0) "" ["month"]=> string(2) "12" [4]=> @@ -75,7 +75,7 @@ array(10) { [2]=> string(2) "12" [3]=> - NULL + string(0) "" ["month"]=> string(3) "Aug" [4]=> diff --git a/ext/pcre/tests/003.phpt b/ext/pcre/tests/003.phpt index 2144032d9d0a2..e697c375c6d5f 100644 --- a/ext/pcre/tests/003.phpt +++ b/ext/pcre/tests/003.phpt @@ -58,7 +58,7 @@ array(10) { [0]=> string(2) "20" [1]=> - NULL + string(0) "" } ["month"]=> array(2) { @@ -127,7 +127,7 @@ array(2) { [2]=> string(2) "12" [3]=> - NULL + string(0) "" ["month"]=> string(3) "Aug" [4]=> diff --git a/ext/pcre/tests/004.phpt b/ext/pcre/tests/004.phpt index 29f8204351e78..11361d1b32d14 100644 --- a/ext/pcre/tests/004.phpt +++ b/ext/pcre/tests/004.phpt @@ -24,7 +24,7 @@ array(2) { [1]=> string(12) "unsigned int" [2]=> - NULL + string(0) "" [3]=> string(0) "" [4]=> @@ -41,13 +41,13 @@ array(2) { [1]=> string(5) "short" [2]=> - NULL + string(0) "" [3]=> string(0) "" [4]=> string(1) "a" [5]=> - NULL + string(0) "" [6]=> string(3) ", b" } diff --git a/ext/pcre/tests/bug61780.phpt b/ext/pcre/tests/bug61780.phpt index 25b1e13126890..fdf58f569287e 100644 --- a/ext/pcre/tests/bug61780.phpt +++ b/ext/pcre/tests/bug61780.phpt @@ -2,7 +2,7 @@ Bug #61780 (Inconsistent PCRE captures in match results): basics --FILE-- --EXPECT-- diff --git a/ext/pcre/tests/bug61780_1.phpt b/ext/pcre/tests/bug61780_1.phpt index d8e35c5c21f26..dc5806cb30e40 100644 --- a/ext/pcre/tests/bug61780_1.phpt +++ b/ext/pcre/tests/bug61780_1.phpt @@ -2,22 +2,22 @@ Bug #61780 (Inconsistent PCRE captures in match results): numeric subpatterns --FILE-- --EXPECT-- diff --git a/ext/pcre/tests/bug61780_2.phpt b/ext/pcre/tests/bug61780_2.phpt index 375c02f5d24bb..faf44d368bef7 100644 --- a/ext/pcre/tests/bug61780_2.phpt +++ b/ext/pcre/tests/bug61780_2.phpt @@ -2,22 +2,22 @@ Bug #61780 (Inconsistent PCRE captures in match results): named subpatterns --FILE-- 4)?(?2)?\d/', '23456', $matches); +preg_match('/(?4)?(?2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL); var_export($matches); echo "\n\n"; -preg_match('/(?4)?(?2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE); +preg_match('/(?4)?(?2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL); var_export($matches); echo "\n\n"; -preg_match_all('/(?4)?(?2)?\d/', '123456', $matches); +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL); var_export($matches); echo "\n\n"; -preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE); +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL); var_export($matches); echo "\n\n"; -preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER); +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL); var_export($matches); echo "\n\n"; -preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL); var_export($matches); ?> --EXPECT-- diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt index c065caab186eb..8838a00500880 100644 --- a/ext/pcre/tests/marks.phpt +++ b/ext/pcre/tests/marks.phpt @@ -39,9 +39,9 @@ array(5) { [0]=> string(3) "_c_" [1]=> - NULL + string(0) "" [2]=> - NULL + string(0) "" [3]=> string(1) "c" ["MARK"]=> @@ -65,42 +65,42 @@ array(6) { [0]=> string(1) "a" [1]=> - NULL + string(0) "" [2]=> - NULL + string(0) "" [3]=> - NULL + string(0) "" } [2]=> array(4) { [0]=> - NULL + string(0) "" [1]=> string(1) "b" [2]=> - NULL + string(0) "" [3]=> - NULL + string(0) "" } [3]=> array(4) { [0]=> - NULL + string(0) "" [1]=> - NULL + string(0) "" [2]=> string(1) "c" [3]=> - NULL + string(0) "" } [4]=> array(4) { [0]=> - NULL + string(0) "" [1]=> - NULL + string(0) "" [2]=> - NULL + string(0) "" [3]=> string(1) "d" } @@ -128,7 +128,7 @@ array(4) { [0]=> string(3) "_b_" [1]=> - NULL + string(0) "" [2]=> string(1) "b" } @@ -137,9 +137,9 @@ array(4) { [0]=> string(3) "_c_" [1]=> - NULL + string(0) "" [2]=> - NULL + string(0) "" [3]=> string(1) "c" ["MARK"]=> @@ -150,11 +150,11 @@ array(4) { [0]=> string(3) "_d_" [1]=> - NULL + string(0) "" [2]=> - NULL + string(0) "" [3]=> - NULL + string(0) "" [4]=> string(1) "d" }