Skip to content

Commit a63852f

Browse files
committed
Fix GH-17122: memory leak in regex
Because the subpattern names are persistent, and the fact that the symbol table destruction is skipped when using fast_shutdown, this means the refcounts will not be updated for the destruction of the arrays that hold the subpattern name keys. To solve this, detect this situation and duplicate the strings.
1 parent b86308c commit a63852f

File tree

2 files changed

+76
-4
lines changed

2 files changed

+76
-4
lines changed

ext/pcre/php_pcre.c

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,22 @@ static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce
549549
}
550550
/* }}} */
551551

552+
/* Because the subpattern names are persistent, and the fact that the
553+
* symbol table destruction is skipped when using fast_shutdown,
554+
* this means the refcounts will not be updated for the destruction of
555+
* the arrays that hold the subpattern name keys.
556+
* To solve this, detect this situation and duplicate the strings. */
557+
static zend_always_inline bool has_symbol_table_hazard(void)
558+
{
559+
zend_execute_data *ex = EG(current_execute_data);
560+
if (UNEXPECTED(!ex || !ex->prev_execute_data)) {
561+
return false;
562+
}
563+
/* Note: we may also narrow this to only check if the symbol table equals
564+
* &EG(symbol_table), but this is a cheaper check. */
565+
return ZEND_CALL_INFO(ex->prev_execute_data) & ZEND_CALL_HAS_SYMBOL_TABLE;
566+
}
567+
552568
/* {{{ static calculate_unit_length */
553569
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
554570
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
@@ -971,12 +987,26 @@ static zend_always_inline void populate_match_value(
971987

972988
static inline void add_named(
973989
HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
990+
ZEND_ASSERT(GC_FLAGS(name) & IS_STR_PERSISTENT);
991+
992+
bool symbol_table_hazard = has_symbol_table_hazard();
993+
974994
/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
975995
* In this case we want to preserve the one that actually has a value. */
976996
if (!unmatched) {
977-
zend_hash_update(subpats, name, val);
997+
if (EXPECTED(!symbol_table_hazard)) {
998+
zend_hash_update(subpats, name, val);
999+
} else {
1000+
zend_hash_str_update(subpats, ZSTR_VAL(name), ZSTR_LEN(name), val);
1001+
}
9781002
} else {
979-
if (!zend_hash_add(subpats, name, val)) {
1003+
zval *zv;
1004+
if (EXPECTED(!symbol_table_hazard)) {
1005+
zv = zend_hash_add(subpats, name, val);
1006+
} else {
1007+
zv = zend_hash_str_add(subpats, ZSTR_VAL(name), ZSTR_LEN(name), val);
1008+
}
1009+
if (!zv) {
9801010
return;
9811011
}
9821012
}
@@ -1061,10 +1091,16 @@ static void populate_subpat_array(
10611091
zend_hash_next_index_insert_new(subpats_ht, &val);
10621092
}
10631093
if (unmatched_as_null) {
1094+
bool symbol_table_hazard = has_symbol_table_hazard();
1095+
10641096
for (i = count; i < num_subpats; i++) {
10651097
ZVAL_NULL(&val);
10661098
if (subpat_names[i]) {
1067-
zend_hash_add(subpats_ht, subpat_names[i], &val);
1099+
if (EXPECTED(!symbol_table_hazard)) {
1100+
zend_hash_add(subpats_ht, subpat_names[i], &val);
1101+
} else {
1102+
zend_hash_str_add(subpats_ht, ZSTR_VAL(subpat_names[i]), ZSTR_LEN(subpat_names[i]), &val);
1103+
}
10681104
}
10691105
zend_hash_next_index_insert_new(subpats_ht, &val);
10701106
}
@@ -1429,11 +1465,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14291465
/* Add the match sets to the output array and clean up */
14301466
if (match_sets) {
14311467
if (subpat_names) {
1468+
bool symbol_table_hazard = has_symbol_table_hazard();
1469+
14321470
for (i = 0; i < num_subpats; i++) {
14331471
zval wrapper;
14341472
ZVAL_ARR(&wrapper, match_sets[i]);
14351473
if (subpat_names[i]) {
1436-
zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1474+
if (EXPECTED(!symbol_table_hazard)) {
1475+
zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1476+
} else {
1477+
zend_hash_str_update(Z_ARRVAL_P(subpats), ZSTR_VAL(subpat_names[i]), ZSTR_LEN(subpat_names[i]), &wrapper);
1478+
}
14371479
GC_ADDREF(match_sets[i]);
14381480
}
14391481
zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);

ext/pcre/tests/gh17122.phpt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
GH-17122 (memory leak in regex)
3+
--FILE--
4+
<?php
5+
preg_match('|(?P<name>)(\d+)|', 0xffffffff, $m1);
6+
var_dump($m1);
7+
\preg_match('|(?P<name2>)(\d+)|', 0, $m2);
8+
var_dump($m2);
9+
?>
10+
--EXPECT--
11+
array(4) {
12+
[0]=>
13+
string(10) "4294967295"
14+
["name"]=>
15+
string(0) ""
16+
[1]=>
17+
string(0) ""
18+
[2]=>
19+
string(10) "4294967295"
20+
}
21+
array(4) {
22+
[0]=>
23+
string(1) "0"
24+
["name2"]=>
25+
string(0) ""
26+
[1]=>
27+
string(0) ""
28+
[2]=>
29+
string(1) "0"
30+
}

0 commit comments

Comments
 (0)