Skip to content

Commit cd5591a

Browse files
committed
PCRE: Only remember valid UTF-8 if start offset zero
PCRE only validates the string starting from the start offset (minus maximum look-behind, but let's ignore that), so we can only remember that the string is fully valid UTF-8 is the original start offset is zero.
1 parent c9e78e6 commit cd5591a

File tree

3 files changed

+19
-4
lines changed

3 files changed

+19
-4
lines changed

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ PHP NEWS
1313
- PCRE:
1414
. Fixed bug #79188 (Memory corruption in preg_replace/preg_replace_callback
1515
and unicode). (Nikita)
16+
. Fixed bug #79241 (Segmentation fault on preg_match()). (Nikita)
1617

1718
?? ??? ????, PHP 7.4.3
1819

ext/pcre/php_pcre.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
11671167
PCRE2_SPTR mark = NULL; /* Target for MARK name */
11681168
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
11691169
pcre2_match_data *match_data;
1170-
PCRE2_SIZE start_offset2;
1170+
PCRE2_SIZE start_offset2, orig_start_offset;
11711171

11721172
char *subject = ZSTR_VAL(subject_str);
11731173
size_t subject_len = ZSTR_LEN(subject_str);
@@ -1263,8 +1263,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12631263
}
12641264
}
12651265

1266-
options = (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, start_offset2)
1267-
? 0 : PCRE2_NO_UTF_CHECK;
1266+
orig_start_offset = start_offset2;
1267+
options =
1268+
(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1269+
? 0 : PCRE2_NO_UTF_CHECK;
12681270

12691271
/* Execute the regular expression. */
12701272
#ifdef HAVE_PCRE_JIT_SUPPORT
@@ -1454,7 +1456,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14541456

14551457
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
14561458
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1457-
if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) {
1459+
if ((pce->compile_options & PCRE2_UTF)
1460+
&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
14581461
GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
14591462
}
14601463

ext/pcre/tests/bug79241.phpt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,19 @@ var_dump(preg_match($pattern, $text, $matches, 0, 0));
1515
var_dump(preg_match($pattern, $text, $matches, 0, 1));
1616
var_dump(preg_last_error() == PREG_BAD_UTF8_OFFSET_ERROR);
1717

18+
echo "\n";
19+
20+
$text = "VA\xff"; $text .= "LID";
21+
var_dump(preg_match($pattern, $text, $matches, 0, 4));
22+
var_dump(preg_match($pattern, $text, $matches, 0, 0));
23+
var_dump(preg_last_error() == PREG_BAD_UTF8_ERROR);
24+
1825
?>
1926
--EXPECT--
2027
int(0)
2128
bool(false)
2229
bool(true)
30+
31+
int(1)
32+
bool(false)
33+
bool(true)

0 commit comments

Comments
 (0)