From 911366eb5f884413b5b285c270b1dfda34675459 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Sat, 23 May 2015 19:28:14 +0200 Subject: [PATCH 1/4] added failing tests for bug #61780 --- ext/pcre/tests/bug61780_1.phpt | 194 ++++++++++++++++++++++++ ext/pcre/tests/bug61780_2.phpt | 264 +++++++++++++++++++++++++++++++++ 2 files changed, 458 insertions(+) create mode 100644 ext/pcre/tests/bug61780_1.phpt create mode 100644 ext/pcre/tests/bug61780_2.phpt diff --git a/ext/pcre/tests/bug61780_1.phpt b/ext/pcre/tests/bug61780_1.phpt new file mode 100644 index 0000000000000..d8e35c5c21f26 --- /dev/null +++ b/ext/pcre/tests/bug61780_1.phpt @@ -0,0 +1,194 @@ +--TEST-- +Bug #61780 (Inconsistent PCRE captures in match results): numeric subpatterns +--FILE-- + +--EXPECT-- +array ( + 0 => '23', + 1 => NULL, + 2 => '2', +) + +array ( + 0 => + array ( + 0 => '23', + 1 => 0, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => '2', + 1 => 0, + ), +) + +array ( + 0 => + array ( + 0 => '1', + 1 => '23', + 2 => '45', + 3 => '6', + ), + 1 => + array ( + 0 => NULL, + 1 => NULL, + 2 => '4', + 3 => NULL, + ), + 2 => + array ( + 0 => NULL, + 1 => '2', + 2 => NULL, + 3 => NULL, + ), +) + +array ( + 0 => + array ( + 0 => + array ( + 0 => '1', + 1 => 0, + ), + 1 => + array ( + 0 => '23', + 1 => 1, + ), + 2 => + array ( + 0 => '45', + 1 => 3, + ), + 3 => + array ( + 0 => '6', + 1 => 5, + ), + ), + 1 => + array ( + 0 => NULL, + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => '4', + 1 => 3, + ), + 3 => NULL, + ), + 2 => + array ( + 0 => NULL, + 1 => + array ( + 0 => '2', + 1 => 1, + ), + 2 => NULL, + 3 => NULL, + ), +) + +array ( + 0 => + array ( + 0 => '1', + ), + 1 => + array ( + 0 => '23', + 1 => NULL, + 2 => '2', + ), + 2 => + array ( + 0 => '45', + 1 => '4', + ), + 3 => + array ( + 0 => '6', + ), +) + +array ( + 0 => + array ( + 0 => + array ( + 0 => '1', + 1 => 0, + ), + ), + 1 => + array ( + 0 => + array ( + 0 => '23', + 1 => 1, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => '2', + 1 => 1, + ), + ), + 2 => + array ( + 0 => + array ( + 0 => '45', + 1 => 3, + ), + 1 => + array ( + 0 => '4', + 1 => 3, + ), + ), + 3 => + array ( + 0 => + array ( + 0 => '6', + 1 => 5, + ), + ), +) diff --git a/ext/pcre/tests/bug61780_2.phpt b/ext/pcre/tests/bug61780_2.phpt new file mode 100644 index 0000000000000..375c02f5d24bb --- /dev/null +++ b/ext/pcre/tests/bug61780_2.phpt @@ -0,0 +1,264 @@ +--TEST-- +Bug #61780 (Inconsistent PCRE captures in match results): named subpatterns +--FILE-- +4)?(?2)?\d/', '23456', $matches); +var_export($matches); +echo "\n\n"; +preg_match('/(?4)?(?2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE); +var_export($matches); +echo "\n\n"; +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches); +var_export($matches); +echo "\n\n"; +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE); +var_export($matches); +echo "\n\n"; +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER); +var_export($matches); +echo "\n\n"; +preg_match_all('/(?4)?(?2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); +var_export($matches); +?> +--EXPECT-- +array ( + 0 => '23', + 'a' => NULL, + 1 => NULL, + 'b' => '2', + 2 => '2', +) + +array ( + 0 => + array ( + 0 => '23', + 1 => 0, + ), + 'a' => + array ( + 0 => NULL, + 1 => -1, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 'b' => + array ( + 0 => '2', + 1 => 0, + ), + 2 => + array ( + 0 => '2', + 1 => 0, + ), +) + +array ( + 0 => + array ( + 0 => '1', + 1 => '23', + 2 => '45', + 3 => '6', + ), + 'a' => + array ( + 0 => NULL, + 1 => NULL, + 2 => '4', + 3 => NULL, + ), + 1 => + array ( + 0 => NULL, + 1 => NULL, + 2 => '4', + 3 => NULL, + ), + 'b' => + array ( + 0 => NULL, + 1 => '2', + 2 => NULL, + 3 => NULL, + ), + 2 => + array ( + 0 => NULL, + 1 => '2', + 2 => NULL, + 3 => NULL, + ), +) + +array ( + 0 => + array ( + 0 => + array ( + 0 => '1', + 1 => 0, + ), + 1 => + array ( + 0 => '23', + 1 => 1, + ), + 2 => + array ( + 0 => '45', + 1 => 3, + ), + 3 => + array ( + 0 => '6', + 1 => 5, + ), + ), + 'a' => + array ( + 0 => NULL, + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => '4', + 1 => 3, + ), + 3 => NULL, + ), + 1 => + array ( + 0 => NULL, + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => '4', + 1 => 3, + ), + 3 => NULL, + ), + 'b' => + array ( + 0 => NULL, + 1 => + array ( + 0 => '2', + 1 => 1, + ), + 2 => NULL, + 3 => NULL, + ), + 2 => + array ( + 0 => NULL, + 1 => + array ( + 0 => '2', + 1 => 1, + ), + 2 => NULL, + 3 => NULL, + ), +) + +array ( + 0 => + array ( + 0 => '1', + ), + 1 => + array ( + 0 => '23', + 'a' => NULL, + 1 => NULL, + 'b' => '2', + 2 => '2', + ), + 2 => + array ( + 0 => '45', + 'a' => '4', + 1 => '4', + ), + 3 => + array ( + 0 => '6', + ), +) + +array ( + 0 => + array ( + 0 => + array ( + 0 => '1', + 1 => 0, + ), + ), + 1 => + array ( + 0 => + array ( + 0 => '23', + 1 => 1, + ), + 'a' => + array ( + 0 => NULL, + 1 => -1, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 'b' => + array ( + 0 => '2', + 1 => 1, + ), + 2 => + array ( + 0 => '2', + 1 => 1, + ), + ), + 2 => + array ( + 0 => + array ( + 0 => '45', + 1 => 3, + ), + 'a' => + array ( + 0 => '4', + 1 => 3, + ), + 1 => + array ( + 0 => '4', + 1 => 3, + ), + ), + 3 => + array ( + 0 => + array ( + 0 => '6', + 1 => 5, + ), + ), +) From 6f33594978e56625bd1f6eb0f245e87c0126a5e8 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Sat, 23 May 2015 19:44:42 +0200 Subject: [PATCH 2/4] changed preg_*() to yield NULL instead of '' for unset substrings --- ext/pcre/php_pcre.c | 64 +++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 3420a099882bc..0fe5afd988480 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -540,7 +540,11 @@ static inline void add_offset_pair(zval *result, char *str, int len, int offset, array_init_size(&match_pair, 2); /* Add (match, offset) to the return value */ - ZVAL_STRINGL(&tmp, str, len); + if (offset < 0) { /* unset substring */ + ZVAL_NULL(&tmp); + } else { + ZVAL_STRINGL(&tmp, str, len); + } zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); ZVAL_LONG(&tmp, offset); zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); @@ -741,8 +745,12 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } } else { for (i = 0; i < count; i++) { - add_next_index_stringl(&match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); + if (offsets[i<<1] < 0) { /* unset substring */ + add_next_index_null(&match_sets[i]); + } else { + add_next_index_stringl(&match_sets[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } } } /* Add MARK, if available */ @@ -755,11 +763,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result - * arrays with empty strings. + * arrays with NULLs. */ if (count < num_subpats) { for (; i < num_subpats; i++) { - add_next_index_string(&match_sets[i], ""); + add_next_index_null(&match_sets[i]); } } } else { @@ -776,11 +784,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i], + if (offsets[i<<1] < 0) { /* unset substring */ + add_assoc_null(&result_set, subpat_names[i]); + } else { + add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + } + if (offsets[i<<1] < 0) { /* unset substring */ + add_next_index_null(&result_set); + } else { + add_next_index_stringl(&result_set, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); } } } else { @@ -791,8 +807,12 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } } else { for (i = 0; i < count; i++) { - add_next_index_stringl(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); + if (offsets[i<<1] < 0) { /* unset substring */ + add_next_index_null(&result_set); + } else { + add_next_index_stringl(&result_set, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } } } } @@ -815,11 +835,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); + if (offsets[i<<1] < 0) { /* unset substring */ + add_assoc_null(subpats, subpat_names[i]); + } else { + add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } + } + if (offsets[i<<1] < 0) { /* unset substring */ + add_next_index_null(subpats); + } else { + add_next_index_stringl(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); } - add_next_index_stringl(subpats, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); } } } else { @@ -831,8 +859,12 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } } else { for (i = 0; i < count; i++) { - add_next_index_stringl(subpats, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); + if (offsets[i<<1] < 0) { /* unset substring */ + add_next_index_null(subpats); + } else { + add_next_index_stringl(subpats, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1]); + } } } } From 303b3655e62390db0c5daf240f860e62e87753b5 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Sat, 23 May 2015 19:47:47 +0200 Subject: [PATCH 3/4] adjusted other tests to cater to changed behavior --- ext/pcre/tests/001.phpt | 4 ++-- ext/pcre/tests/003.phpt | 4 ++-- ext/pcre/tests/004.phpt | 6 +++--- ext/pcre/tests/marks.phpt | 40 +++++++++++++++++++-------------------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/ext/pcre/tests/001.phpt b/ext/pcre/tests/001.phpt index 313f7fdc679fb..7aeebf3cf6a60 100644 --- a/ext/pcre/tests/001.phpt +++ b/ext/pcre/tests/001.phpt @@ -52,7 +52,7 @@ array(10) { [2]=> string(2) "06" [3]=> - string(0) "" + NULL ["month"]=> string(2) "12" [4]=> @@ -75,7 +75,7 @@ array(10) { [2]=> string(2) "12" [3]=> - string(0) "" + NULL ["month"]=> string(3) "Aug" [4]=> diff --git a/ext/pcre/tests/003.phpt b/ext/pcre/tests/003.phpt index e697c375c6d5f..2144032d9d0a2 100644 --- a/ext/pcre/tests/003.phpt +++ b/ext/pcre/tests/003.phpt @@ -58,7 +58,7 @@ array(10) { [0]=> string(2) "20" [1]=> - string(0) "" + NULL } ["month"]=> array(2) { @@ -127,7 +127,7 @@ array(2) { [2]=> string(2) "12" [3]=> - string(0) "" + NULL ["month"]=> string(3) "Aug" [4]=> diff --git a/ext/pcre/tests/004.phpt b/ext/pcre/tests/004.phpt index 11361d1b32d14..29f8204351e78 100644 --- a/ext/pcre/tests/004.phpt +++ b/ext/pcre/tests/004.phpt @@ -24,7 +24,7 @@ array(2) { [1]=> string(12) "unsigned int" [2]=> - string(0) "" + NULL [3]=> string(0) "" [4]=> @@ -41,13 +41,13 @@ array(2) { [1]=> string(5) "short" [2]=> - string(0) "" + NULL [3]=> string(0) "" [4]=> string(1) "a" [5]=> - string(0) "" + NULL [6]=> string(3) ", b" } diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt index 8838a00500880..c065caab186eb 100644 --- a/ext/pcre/tests/marks.phpt +++ b/ext/pcre/tests/marks.phpt @@ -39,9 +39,9 @@ array(5) { [0]=> string(3) "_c_" [1]=> - string(0) "" + NULL [2]=> - string(0) "" + NULL [3]=> string(1) "c" ["MARK"]=> @@ -65,42 +65,42 @@ array(6) { [0]=> string(1) "a" [1]=> - string(0) "" + NULL [2]=> - string(0) "" + NULL [3]=> - string(0) "" + NULL } [2]=> array(4) { [0]=> - string(0) "" + NULL [1]=> string(1) "b" [2]=> - string(0) "" + NULL [3]=> - string(0) "" + NULL } [3]=> array(4) { [0]=> - string(0) "" + NULL [1]=> - string(0) "" + NULL [2]=> string(1) "c" [3]=> - string(0) "" + NULL } [4]=> array(4) { [0]=> - string(0) "" + NULL [1]=> - string(0) "" + NULL [2]=> - string(0) "" + NULL [3]=> string(1) "d" } @@ -128,7 +128,7 @@ array(4) { [0]=> string(3) "_b_" [1]=> - string(0) "" + NULL [2]=> string(1) "b" } @@ -137,9 +137,9 @@ array(4) { [0]=> string(3) "_c_" [1]=> - string(0) "" + NULL [2]=> - string(0) "" + NULL [3]=> string(1) "c" ["MARK"]=> @@ -150,11 +150,11 @@ array(4) { [0]=> string(3) "_d_" [1]=> - string(0) "" + NULL [2]=> - string(0) "" + NULL [3]=> - string(0) "" + NULL [4]=> string(1) "d" } From 5ca664abbf839708950d57c962438d8878459ec5 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Sat, 23 May 2015 20:34:06 +0200 Subject: [PATCH 4/4] added test to clearly show distinction between unset (aka unmatched) subpattern and an empty match --- ext/pcre/tests/bug61780.phpt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 ext/pcre/tests/bug61780.phpt diff --git a/ext/pcre/tests/bug61780.phpt b/ext/pcre/tests/bug61780.phpt new file mode 100644 index 0000000000000..25b1e13126890 --- /dev/null +++ b/ext/pcre/tests/bug61780.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #61780 (Inconsistent PCRE captures in match results): basics +--FILE-- + +--EXPECT-- +array(4) { + [0]=> + string(3) "123" + [1]=> + NULL + [2]=> + string(0) "" + [3]=> + string(3) "123" +}