Skip to content

Commit 569c291

Browse files
committed
Cache pcre subpattern table
Recreating this over and over is pointless, cache this as well. Fixes GH-14361.
1 parent 4fca8a6 commit 569c291

File tree

1 file changed

+41
-44
lines changed

1 file changed

+41
-44
lines changed

ext/pcre/php_pcre.c

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ char *php_pcre_version;
4949

5050
struct _pcre_cache_entry {
5151
pcre2_code *re;
52+
zend_string **subpats_table;
5253
uint32_t preg_options;
5354
uint32_t capture_count;
54-
uint32_t name_count;
5555
uint32_t compile_options;
5656
uint32_t refcount;
5757
};
@@ -90,6 +90,8 @@ static MUTEX_T pcre_mt = NULL;
9090

9191
ZEND_TLS HashTable char_tables;
9292

93+
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent);
94+
9395
static void php_pcre_free_char_table(zval *data)
9496
{/*{{{*/
9597
void *ptr = Z_PTR_P(data);
@@ -163,6 +165,9 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
163165
{
164166
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
165167
if (!pce) return;
168+
if (pce->subpats_table) {
169+
free_subpats_table(pce->subpats_table, pce->capture_count + 1, true);
170+
}
166171
pcre2_code_free(pce->re);
167172
free(pce);
168173
}
@@ -172,6 +177,9 @@ static void php_efree_pcre_cache(zval *data) /* {{{ */
172177
{
173178
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
174179
if (!pce) return;
180+
if (pce->subpats_table) {
181+
free_subpats_table(pce->subpats_table, pce->capture_count + 1, false);
182+
}
175183
pcre2_code_free(pce->re);
176184
efree(pce);
177185
}
@@ -520,20 +528,21 @@ static int pcre_clean_cache(zval *data, void *arg)
520528
}
521529
/* }}} */
522530

523-
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
531+
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent) {
524532
uint32_t i;
525533
for (i = 0; i < num_subpats; i++) {
526534
if (subpat_names[i]) {
527-
zend_string_release_ex(subpat_names[i], false);
535+
zend_string_release_ex(subpat_names[i], persistent);
528536
}
529537
}
530-
efree(subpat_names);
538+
pefree(subpat_names, persistent);
531539
}
532540

533541
/* {{{ static make_subpats_table */
534-
static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
542+
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce, bool persistent)
535543
{
536-
uint32_t name_cnt = pce->name_count, name_size, ni = 0;
544+
uint32_t num_subpats = pce->capture_count + 1;
545+
uint32_t name_size, ni = 0;
537546
char *name_table;
538547
zend_string **subpat_names;
539548
int rc1, rc2;
@@ -545,14 +554,21 @@ static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *
545554
return NULL;
546555
}
547556

548-
subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
557+
subpat_names = pecalloc(num_subpats, sizeof(zend_string *), persistent);
549558
while (ni++ < name_cnt) {
550559
unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
551560
const char *name = name_table + 2;
552-
subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
561+
/* Note: if we're making persistent strings, they will only be used within this thread.
562+
* Although we will be storing them in user-exposed arrays, they cannot cause problems
563+
* because they only live in this thread and the last reference is deleted on shutdown
564+
* instead of by user code. */
565+
subpat_names[name_idx] = zend_string_init(name, strlen(name), persistent);
566+
if (persistent) {
567+
GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
568+
}
553569
if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
554570
php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
555-
free_subpats_table(subpat_names, num_subpats);
571+
free_subpats_table(subpat_names, num_subpats, persistent);
556572
return NULL;
557573
}
558574
name_table += name_size;
@@ -843,7 +859,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
843859
return NULL;
844860
}
845861

846-
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
862+
uint32_t name_count;
863+
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
847864
if (rc < 0) {
848865
if (key != regex) {
849866
zend_string_release_ex(key, 0);
@@ -853,6 +870,17 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
853870
return NULL;
854871
}
855872

873+
/* Compute and cache the subpattern table to avoid computing it again over and over. */
874+
new_entry.subpats_table = make_subpats_table(name_count, &new_entry, !PCRE_G(per_request_cache));
875+
if (!new_entry.subpats_table) {
876+
if (key != regex) {
877+
zend_string_release_ex(key, 0);
878+
}
879+
/* Warning already emitted by make_subpats_table() */
880+
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
881+
return NULL;
882+
}
883+
856884
/*
857885
* Interned strings are not duplicated when stored in HashTable,
858886
* but all the interned strings created during HTTP request are removed
@@ -1209,11 +1237,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12091237
* allocate the table only if there are any named subpatterns.
12101238
*/
12111239
subpat_names = NULL;
1212-
if (subpats && pce->name_count > 0) {
1213-
subpat_names = make_subpats_table(num_subpats, pce);
1214-
if (!subpat_names) {
1215-
RETURN_FALSE;
1216-
}
1240+
if (subpats) {
1241+
subpat_names = pce->subpats_table;
12171242
}
12181243

12191244
matched = 0;
@@ -1225,9 +1250,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12251250
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
12261251
if (!match_data) {
12271252
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1228-
if (subpat_names) {
1229-
free_subpats_table(subpat_names, num_subpats);
1230-
}
12311253
RETURN_FALSE;
12321254
}
12331255
}
@@ -1274,9 +1296,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12741296
if (subpats != NULL) {
12751297
/* Try to get the list of substrings and display a warning if failed. */
12761298
if (UNEXPECTED(offsets[1] < offsets[0])) {
1277-
if (subpat_names) {
1278-
free_subpats_table(subpat_names, num_subpats);
1279-
}
12801299
if (match_sets) efree(match_sets);
12811300
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
12821301
RETURN_FALSE;
@@ -1440,10 +1459,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14401459
}
14411460
}
14421461

1443-
if (subpat_names) {
1444-
free_subpats_table(subpat_names, num_subpats);
1445-
}
1446-
14471462
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
14481463
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
14491464
if ((pce->compile_options & PCRE2_UTF)
@@ -1857,18 +1872,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18571872

18581873
/* Calculate the size of the offsets array, and allocate memory for it. */
18591874
num_subpats = pce->capture_count + 1;
1860-
1861-
/*
1862-
* Build a mapping from subpattern numbers to their names. We will
1863-
* allocate the table only if there are any named subpatterns.
1864-
*/
1865-
subpat_names = NULL;
1866-
if (UNEXPECTED(pce->name_count > 0)) {
1867-
subpat_names = make_subpats_table(num_subpats, pce);
1868-
if (!subpat_names) {
1869-
return NULL;
1870-
}
1871-
}
1875+
subpat_names = pce->subpats_table;
18721876

18731877
alloc_len = 0;
18741878
result = NULL;
@@ -1888,9 +1892,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18881892
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
18891893
if (!match_data) {
18901894
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1891-
if (subpat_names) {
1892-
free_subpats_table(subpat_names, num_subpats);
1893-
}
18941895
mdata_used = old_mdata_used;
18951896
return NULL;
18961897
}
@@ -2041,10 +2042,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
20412042
}
20422043
mdata_used = old_mdata_used;
20432044

2044-
if (UNEXPECTED(subpat_names)) {
2045-
free_subpats_table(subpat_names, num_subpats);
2046-
}
2047-
20482045
return result;
20492046
}
20502047
/* }}} */

0 commit comments

Comments
 (0)