Skip to content

Commit 1ec40e8

Browse files
committed
Merge branch 'PHP-5.6'
* PHP-5.6: Fix #70232: Incorrect bump-along behavior with \K and empty string match Resolved conflicts: ext/pcre/php_pcre.c
2 parents 715d5d2 + cd18ba1 commit 1ec40e8

File tree

2 files changed

+82
-9
lines changed

2 files changed

+82
-9
lines changed

ext/pcre/php_pcre.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444

4545
#define PCRE_CACHE_SIZE 4096
4646

47+
/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
48+
#ifndef PCRE_NOTEMPTY_ATSTART
49+
# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
50+
#endif
51+
4752
enum {
4853
PHP_PCRE_NO_ERROR = 0,
4954
PHP_PCRE_INTERNAL_ERROR,
@@ -876,7 +881,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
876881
pcre_free((void *) stringlist);
877882
}
878883
} else if (count == PCRE_ERROR_NOMATCH) {
879-
/* If we previously set PCRE_NOTEMPTY after a null match,
884+
/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
880885
this is not necessarily the end. We need to advance
881886
the start offset, and continue. Fudge the offset values
882887
to achieve this, unless we're already at the end of the string. */
@@ -893,10 +898,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
893898
}
894899

895900
/* If we have matched an empty string, mimic what Perl's /g options does.
896-
This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
901+
This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
897902
the match again at the same point. If this fails (picked up above) we
898903
advance to the next character. */
899-
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
904+
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
900905

901906
/* Advance to the position right after the last full match */
902907
start_offset = offsets[1];
@@ -1271,7 +1276,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
12711276
limit--;
12721277
}
12731278
} else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1274-
/* If we previously set PCRE_NOTEMPTY after a null match,
1279+
/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
12751280
this is not necessarily the end. We need to advance
12761281
the start offset, and continue. Fudge the offset values
12771282
to achieve this, unless we're already at the end of the string. */
@@ -1313,10 +1318,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
13131318
}
13141319

13151320
/* If we have matched an empty string, mimic what Perl's /g options does.
1316-
This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1321+
This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
13171322
the match again at the same point. If this fails (picked up above) we
13181323
advance to the next character. */
1319-
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1324+
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
13201325

13211326
/* Advance to the next piece. */
13221327
start_offset = offsets[1];
@@ -1815,7 +1820,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
18151820
}
18161821
}
18171822
} else if (count == PCRE_ERROR_NOMATCH) {
1818-
/* If we previously set PCRE_NOTEMPTY after a null match,
1823+
/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
18191824
this is not necessarily the end. We need to advance
18201825
the start offset, and continue. Fudge the offset values
18211826
to achieve this, unless we're already at the end of the string. */
@@ -1849,10 +1854,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
18491854
}
18501855

18511856
/* If we have matched an empty string, mimic what Perl's /g options does.
1852-
This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1857+
This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
18531858
the match again at the same point. If this fails (picked up above) we
18541859
advance to the next character. */
1855-
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1860+
g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
18561861

18571862
/* Advance to the position right after the last full match */
18581863
start_offset = offsets[1];

ext/pcre/tests/bug70232.phpt

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
--TEST--
2+
Bug #70232 (Incorrect bump-along behavior with \K and empty string match)
3+
--SKIPIF--
4+
<?php
5+
if (version_compare(explode(' ', PCRE_VERSION)[0], '8.0', 'lt')) {
6+
die("skip this test requires libpcre >= 8.0");
7+
}
8+
?>
9+
--FILE--
10+
<?php
11+
$pattern = '~(?: |\G)\d\B\K~';
12+
$subject = "123 a123 1234567 b123 123";
13+
preg_match_all($pattern, $subject, $matches);
14+
var_dump($matches);
15+
var_dump(preg_replace($pattern, "*", $subject));
16+
var_dump(preg_split($pattern, $subject));
17+
?>
18+
--EXPECT--
19+
array(1) {
20+
[0]=>
21+
array(10) {
22+
[0]=>
23+
string(0) ""
24+
[1]=>
25+
string(0) ""
26+
[2]=>
27+
string(0) ""
28+
[3]=>
29+
string(0) ""
30+
[4]=>
31+
string(0) ""
32+
[5]=>
33+
string(0) ""
34+
[6]=>
35+
string(0) ""
36+
[7]=>
37+
string(0) ""
38+
[8]=>
39+
string(0) ""
40+
[9]=>
41+
string(0) ""
42+
}
43+
}
44+
string(35) "1*2*3 a123 1*2*3*4*5*6*7 b123 1*2*3"
45+
array(11) {
46+
[0]=>
47+
string(1) "1"
48+
[1]=>
49+
string(1) "2"
50+
[2]=>
51+
string(8) "3 a123 1"
52+
[3]=>
53+
string(1) "2"
54+
[4]=>
55+
string(1) "3"
56+
[5]=>
57+
string(1) "4"
58+
[6]=>
59+
string(1) "5"
60+
[7]=>
61+
string(1) "6"
62+
[8]=>
63+
string(8) "7 b123 1"
64+
[9]=>
65+
string(1) "2"
66+
[10]=>
67+
string(1) "3"
68+
}

0 commit comments

Comments
 (0)