Skip to content

Commit dc27acd

Browse files
committed
Fix GH-17122: memory leak in regex
Because the subpattern names are persistent, and the fact that the symbol table destruction is skipped when using fast_shutdown, this means the refcounts will not be updated for the destruction of the arrays that hold the subpattern name keys. To solve this, detect this situation and duplicate the strings. Closes GH-17132.
1 parent 61dcfc4 commit dc27acd

File tree

2 files changed

+73
-33
lines changed

2 files changed

+73
-33
lines changed

ext/pcre/php_pcre.c

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,14 @@ char *php_pcre_version;
4949

5050
struct _pcre_cache_entry {
5151
pcre2_code *re;
52-
/* Pointer is not NULL when there are named captures.
53-
* Length is equal to capture_count + 1 to account for capture group 0. */
52+
/* Pointer is not NULL (during request) when there are named captures.
53+
* Length is equal to capture_count + 1 to account for capture group 0.
54+
* This table cache is only valid during request.
55+
* Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
56+
* (see GH-17122 and GH-17132). */
5457
zend_string **subpats_table;
5558
uint32_t preg_options;
59+
uint32_t name_count;
5660
uint32_t capture_count;
5761
uint32_t compile_options;
5862
uint32_t refcount;
@@ -478,6 +482,14 @@ static PHP_RINIT_FUNCTION(pcre)
478482

479483
static PHP_RSHUTDOWN_FUNCTION(pcre)
480484
{
485+
pcre_cache_entry *pce;
486+
ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
487+
if (pce->subpats_table) {
488+
free_subpats_table(pce->subpats_table, pce->capture_count + 1);
489+
pce->subpats_table = NULL;
490+
}
491+
} ZEND_HASH_FOREACH_END();
492+
481493
pcre2_general_context_free(PCRE_G(gctx_zmm));
482494
PCRE_G(gctx_zmm) = NULL;
483495

@@ -509,10 +521,10 @@ static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats)
509521
uint32_t i;
510522
for (i = 0; i < num_subpats; i++) {
511523
if (subpat_names[i]) {
512-
zend_string_release_ex(subpat_names[i], true);
524+
zend_string_release_ex(subpat_names[i], false);
513525
}
514526
}
515-
pefree(subpat_names, true);
527+
efree(subpat_names);
516528
}
517529

518530
/* {{{ static make_subpats_table */
@@ -531,24 +543,25 @@ static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce
531543
return NULL;
532544
}
533545

534-
subpat_names = pecalloc(num_subpats, sizeof(zend_string *), true);
546+
subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
535547
while (ni++ < name_cnt) {
536548
unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
537549
const char *name = name_table + 2;
538-
/* Note: this makes a persistent string when the cache is not request-based because the string
539-
* has to outlive the request. In that case, they will only be used within this thread
540-
* and never be shared.
541-
* Although we will be storing them in user-exposed arrays, they cannot cause problems
542-
* because they only live in this thread and the last reference is deleted on shutdown
543-
* instead of by user code. */
544-
subpat_names[name_idx] = zend_string_init(name, strlen(name), true);
545-
GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
550+
subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
546551
name_table += name_size;
547552
}
548553
return subpat_names;
549554
}
550555
/* }}} */
551556

557+
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
558+
{
559+
if (!pce->subpats_table) {
560+
pce->subpats_table = make_subpats_table(name_cnt, pce);
561+
}
562+
return pce->subpats_table;
563+
}
564+
552565
/* {{{ static calculate_unit_length */
553566
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
554567
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
@@ -820,6 +833,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
820833
new_entry.preg_options = poptions;
821834
new_entry.compile_options = coptions;
822835
new_entry.refcount = 0;
836+
new_entry.subpats_table = NULL;
823837

824838
rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
825839
if (rc < 0) {
@@ -831,8 +845,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
831845
return NULL;
832846
}
833847

834-
uint32_t name_count;
835-
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
848+
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
836849
if (rc < 0) {
837850
if (key != regex) {
838851
zend_string_release_ex(key, 0);
@@ -842,21 +855,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
842855
return NULL;
843856
}
844857

845-
/* Compute and cache the subpattern table to avoid computing it again over and over. */
846-
if (name_count > 0) {
847-
new_entry.subpats_table = make_subpats_table(name_count, &new_entry);
848-
if (!new_entry.subpats_table) {
849-
if (key != regex) {
850-
zend_string_release_ex(key, false);
851-
}
852-
/* Warning already emitted by make_subpats_table() */
853-
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854-
return NULL;
855-
}
856-
} else {
857-
new_entry.subpats_table = NULL;
858-
}
859-
860858
/*
861859
* Interned strings are not duplicated when stored in HashTable,
862860
* but all the interned strings created during HTTP request are removed
@@ -971,6 +969,8 @@ static zend_always_inline void populate_match_value(
971969

972970
static inline void add_named(
973971
HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
972+
ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
973+
974974
/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
975975
* In this case we want to preserve the one that actually has a value. */
976976
if (!unmatched) {
@@ -1232,8 +1232,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12321232
* allocate the table only if there are any named subpatterns.
12331233
*/
12341234
subpat_names = NULL;
1235-
if (subpats) {
1236-
subpat_names = pce->subpats_table;
1235+
if (subpats && pce->name_count > 0) {
1236+
subpat_names = ensure_subpats_table(pce->name_count, pce);
1237+
if (UNEXPECTED(!subpat_names)) {
1238+
RETURN_FALSE;
1239+
}
12371240
}
12381241

12391242
matched = 0;
@@ -1869,7 +1872,14 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18691872

18701873
/* Calculate the size of the offsets array, and allocate memory for it. */
18711874
num_subpats = pce->capture_count + 1;
1872-
subpat_names = pce->subpats_table;
1875+
if (pce->name_count > 0) {
1876+
subpat_names = ensure_subpats_table(pce->name_count, pce);
1877+
if (UNEXPECTED(!subpat_names)) {
1878+
return NULL;
1879+
}
1880+
} else {
1881+
subpat_names = NULL;
1882+
}
18731883

18741884
alloc_len = 0;
18751885
result = NULL;

ext/pcre/tests/gh17122.phpt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
GH-17122 (memory leak in regex)
3+
--FILE--
4+
<?php
5+
preg_match('|(?P<name>)(\d+)|', 0xffffffff, $m1);
6+
var_dump($m1);
7+
\preg_match('|(?P<name2>)(\d+)|', 0, $m2);
8+
var_dump($m2);
9+
?>
10+
--EXPECT--
11+
array(4) {
12+
[0]=>
13+
string(10) "4294967295"
14+
["name"]=>
15+
string(0) ""
16+
[1]=>
17+
string(0) ""
18+
[2]=>
19+
string(10) "4294967295"
20+
}
21+
array(4) {
22+
[0]=>
23+
string(1) "0"
24+
["name2"]=>
25+
string(0) ""
26+
[1]=>
27+
string(0) ""
28+
[2]=>
29+
string(1) "0"
30+
}

0 commit comments

Comments
 (0)