Skip to content

Commit 2597441

Browse files
authored
Cache pcre subpattern table (php#14424)
Recreating this over and over is pointless, cache this as well. Fixes phpGH-14423.
1 parent face2ab commit 2597441

File tree

2 files changed

+51
-43
lines changed

2 files changed

+51
-43
lines changed

UPGRADING

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,9 @@ PHP 8.4 UPGRADE NOTES
767767
- MySQLnd:
768768
. Improved the performance of MySQLnd quoting.
769769

770+
- PCRE:
771+
. Improved the performance of named capture groups.
772+
770773
- SimpleXML:
771774
. Improved performance and reduce memory consumption of XML serialization.
772775

ext/pcre/php_pcre.c

Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ char *php_pcre_version;
4949

5050
struct _pcre_cache_entry {
5151
pcre2_code *re;
52+
/* Pointer is not NULL when there are named captures.
53+
* Length is equal to capture_count + 1 to account for capture group 0. */
54+
zend_string **subpats_table;
5255
uint32_t preg_options;
5356
uint32_t capture_count;
54-
uint32_t name_count;
5557
uint32_t compile_options;
5658
uint32_t refcount;
5759
};
@@ -90,6 +92,8 @@ static MUTEX_T pcre_mt = NULL;
9092

9193
ZEND_TLS HashTable char_tables;
9294

95+
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent);
96+
9397
static void php_pcre_free_char_table(zval *data)
9498
{/*{{{*/
9599
void *ptr = Z_PTR_P(data);
@@ -163,6 +167,9 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
163167
{
164168
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
165169
if (!pce) return;
170+
if (pce->subpats_table) {
171+
free_subpats_table(pce->subpats_table, pce->capture_count + 1, true);
172+
}
166173
pcre2_code_free(pce->re);
167174
free(pce);
168175
}
@@ -172,6 +179,9 @@ static void php_efree_pcre_cache(zval *data) /* {{{ */
172179
{
173180
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
174181
if (!pce) return;
182+
if (pce->subpats_table) {
183+
free_subpats_table(pce->subpats_table, pce->capture_count + 1, false);
184+
}
175185
pcre2_code_free(pce->re);
176186
efree(pce);
177187
}
@@ -520,20 +530,21 @@ static int pcre_clean_cache(zval *data, void *arg)
520530
}
521531
/* }}} */
522532

523-
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
533+
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats, bool persistent) {
524534
uint32_t i;
525535
for (i = 0; i < num_subpats; i++) {
526536
if (subpat_names[i]) {
527-
zend_string_release_ex(subpat_names[i], false);
537+
zend_string_release_ex(subpat_names[i], persistent);
528538
}
529539
}
530-
efree(subpat_names);
540+
pefree(subpat_names, persistent);
531541
}
532542

533543
/* {{{ static make_subpats_table */
534-
static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
544+
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce, bool persistent)
535545
{
536-
uint32_t name_cnt = pce->name_count, name_size, ni = 0;
546+
uint32_t num_subpats = pce->capture_count + 1;
547+
uint32_t name_size, ni = 0;
537548
char *name_table;
538549
zend_string **subpat_names;
539550
int rc1, rc2;
@@ -545,11 +556,20 @@ static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *
545556
return NULL;
546557
}
547558

548-
subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
559+
subpat_names = pecalloc(num_subpats, sizeof(zend_string *), persistent);
549560
while (ni++ < name_cnt) {
550561
unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
551562
const char *name = name_table + 2;
552-
subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
563+
/* Note: this makes a persistent string when the cache is not request-based because the string
564+
* has to outlive the request. In that case, they will only be used within this thread
565+
* and never be shared.
566+
* Although we will be storing them in user-exposed arrays, they cannot cause problems
567+
* because they only live in this thread and the last reference is deleted on shutdown
568+
* instead of by user code. */
569+
subpat_names[name_idx] = zend_string_init(name, strlen(name), persistent);
570+
if (persistent) {
571+
GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
572+
}
553573
name_table += name_size;
554574
}
555575
return subpat_names;
@@ -838,7 +858,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
838858
return NULL;
839859
}
840860

841-
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
861+
uint32_t name_count;
862+
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
842863
if (rc < 0) {
843864
if (key != regex) {
844865
zend_string_release_ex(key, 0);
@@ -848,6 +869,21 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
848869
return NULL;
849870
}
850871

872+
/* Compute and cache the subpattern table to avoid computing it again over and over. */
873+
if (name_count > 0) {
874+
new_entry.subpats_table = make_subpats_table(name_count, &new_entry, !PCRE_G(per_request_cache));
875+
if (!new_entry.subpats_table) {
876+
if (key != regex) {
877+
zend_string_release_ex(key, false);
878+
}
879+
/* Warning already emitted by make_subpats_table() */
880+
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
881+
return NULL;
882+
}
883+
} else {
884+
new_entry.subpats_table = NULL;
885+
}
886+
851887
/*
852888
* Interned strings are not duplicated when stored in HashTable,
853889
* but all the interned strings created during HTTP request are removed
@@ -1204,11 +1240,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12041240
* allocate the table only if there are any named subpatterns.
12051241
*/
12061242
subpat_names = NULL;
1207-
if (subpats && pce->name_count > 0) {
1208-
subpat_names = make_subpats_table(num_subpats, pce);
1209-
if (!subpat_names) {
1210-
RETURN_FALSE;
1211-
}
1243+
if (subpats) {
1244+
subpat_names = pce->subpats_table;
12121245
}
12131246

12141247
matched = 0;
@@ -1220,9 +1253,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12201253
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
12211254
if (!match_data) {
12221255
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1223-
if (subpat_names) {
1224-
free_subpats_table(subpat_names, num_subpats);
1225-
}
12261256
RETURN_FALSE;
12271257
}
12281258
}
@@ -1269,9 +1299,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12691299
if (subpats != NULL) {
12701300
/* Try to get the list of substrings and display a warning if failed. */
12711301
if (UNEXPECTED(offsets[1] < offsets[0])) {
1272-
if (subpat_names) {
1273-
free_subpats_table(subpat_names, num_subpats);
1274-
}
12751302
if (match_sets) efree(match_sets);
12761303
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
12771304
RETURN_FALSE;
@@ -1435,10 +1462,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14351462
}
14361463
}
14371464

1438-
if (subpat_names) {
1439-
free_subpats_table(subpat_names, num_subpats);
1440-
}
1441-
14421465
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
14431466
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
14441467
if ((pce->compile_options & PCRE2_UTF)
@@ -1852,18 +1875,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18521875

18531876
/* Calculate the size of the offsets array, and allocate memory for it. */
18541877
num_subpats = pce->capture_count + 1;
1855-
1856-
/*
1857-
* Build a mapping from subpattern numbers to their names. We will
1858-
* allocate the table only if there are any named subpatterns.
1859-
*/
1860-
subpat_names = NULL;
1861-
if (UNEXPECTED(pce->name_count > 0)) {
1862-
subpat_names = make_subpats_table(num_subpats, pce);
1863-
if (!subpat_names) {
1864-
return NULL;
1865-
}
1866-
}
1878+
subpat_names = pce->subpats_table;
18671879

18681880
alloc_len = 0;
18691881
result = NULL;
@@ -1883,9 +1895,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18831895
match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
18841896
if (!match_data) {
18851897
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1886-
if (subpat_names) {
1887-
free_subpats_table(subpat_names, num_subpats);
1888-
}
18891898
mdata_used = old_mdata_used;
18901899
return NULL;
18911900
}
@@ -2036,10 +2045,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
20362045
}
20372046
mdata_used = old_mdata_used;
20382047

2039-
if (UNEXPECTED(subpat_names)) {
2040-
free_subpats_table(subpat_names, num_subpats);
2041-
}
2042-
20432048
return result;
20442049
}
20452050
/* }}} */

0 commit comments

Comments
 (0)