From e4e0c0b82554be087bd467645eefc656494a025c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:16:44 +0200 Subject: [PATCH 01/11] Always inline populate_match_value and fix argument type The call overhead of this function is quite large. --- ext/pcre/php_pcre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 9465c0e53a95..f8870995819a 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -948,9 +948,9 @@ static zend_always_inline void populate_match_value_str( ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset); } -static inline void populate_match_value( +static zend_always_inline void populate_match_value( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, - uint32_t unmatched_as_null) { + bool unmatched_as_null) { if (PCRE2_UNSET == start_offset) { if (unmatched_as_null) { ZVAL_NULL(val); From 407a4395822f5c16a7125f032ed1c1342ab026bd Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:17:44 +0200 Subject: [PATCH 02/11] Use _new variant of zend_hash in some places to avoid additional check --- ext/pcre/php_pcre.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index f8870995819a..0c053cec451d 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1006,7 +1006,7 @@ static inline void add_offset_pair( if (name) { add_named(result, name, &match_pair, start_offset == PCRE2_UNSET); } - zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair); + zend_hash_next_index_insert_new(Z_ARRVAL_P(result), &match_pair); } /* }}} */ @@ -1036,7 +1036,7 @@ static void populate_subpat_array( if (subpat_names[i]) { add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { @@ -1044,7 +1044,7 @@ static void populate_subpat_array( if (subpat_names[i]) { zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); } } } @@ -1063,7 +1063,7 @@ static void populate_subpat_array( for (i = 0; i < count; i++) { populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { @@ -1334,7 +1334,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, &result_set, subject, offsets, subpat_names, num_subpats, count, mark, flags); /* And add it to the output array */ - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set); } } else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ @@ -1415,11 +1415,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]); Z_ADDREF(match_sets[i]); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &match_sets[i]); } } else { for (i = 0; i < num_subpats; i++) { - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &match_sets[i]); } } efree(match_sets); From d000648ff65d11a660252299cf27f2f96fc5e0a4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:18:54 +0200 Subject: [PATCH 03/11] Move allocation of match_sets down to simplify and reduce code size --- ext/pcre/php_pcre.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 0c053cec451d..6afe8eb77e57 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1215,14 +1215,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } - /* Allocate match sets array and initialize the values. */ - if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0); - for (i=0; icompile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset) From 83d396300a98414ed26a691ae47e07fa82dbceb4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:19:21 +0200 Subject: [PATCH 04/11] Move pcre2_get_ovector_pointer out of the loop This is allocated together with the match data and stays loop invariant: the pointer is always the same (the values not however). --- ext/pcre/php_pcre.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 6afe8eb77e57..ca36073cbb27 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1239,6 +1239,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } + offsets = pcre2_get_ovector_pointer(match_data); + orig_start_offset = start_offset2; options = (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset) @@ -1266,8 +1268,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, matched: matched++; - offsets = pcre2_get_ovector_pointer(match_data); - /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ @@ -1606,6 +1606,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + offsets = pcre2_get_ovector_pointer(match_data); + /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { @@ -1629,8 +1631,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (result) { @@ -1862,6 +1862,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + offsets = pcre2_get_ovector_pointer(match_data); + /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { @@ -1883,8 +1885,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (result) { @@ -2533,6 +2533,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + offsets = pcre2_get_ovector_pointer(match_data); + #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, @@ -2552,8 +2554,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; break; From 35ea734d31ed8b939ee6a905983acda0a5372b88 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:19:53 +0200 Subject: [PATCH 05/11] Mark error condition as cold block --- ext/pcre/php_pcre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index ca36073cbb27..e7fec081bdfe 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1271,7 +1271,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ - if (offsets[1] < offsets[0]) { + if (UNEXPECTED(offsets[1] < offsets[0])) { if (subpat_names) { free_subpats_table(subpat_names, num_subpats); } From 80d6e10b3dcc4c8b82c7342b5bc5e4e3e1b70ed4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:20:14 +0200 Subject: [PATCH 06/11] Simplify condition: subpats is already checked --- ext/pcre/php_pcre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e7fec081bdfe..9362ed121af9 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1281,7 +1281,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } if (global) { /* global pattern matching */ - if (subpats && subpats_order == PREG_PATTERN_ORDER) { + if (subpats_order == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ if (offset_capture) { for (i = 0; i < count; i++) { From 6d1c9d80d5b825568f19d9f953edc61de0849fea Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:20:38 +0200 Subject: [PATCH 07/11] Move array size preallocation to use allocate the up-to-date size --- ext/pcre/php_pcre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 9362ed121af9..e3877651a2a5 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1325,8 +1325,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } else { /* Allocate and populate the result set array */ - array_init_size(&result_set, count + (mark ? 1 : 0)); mark = pcre2_get_mark(match_data); + array_init_size(&result_set, count + (mark ? 1 : 0)); populate_subpat_array( &result_set, subject, offsets, subpat_names, num_subpats, count, mark, flags); From 3eb8cf95577ad4de1c44fb452c564cc4eba901e3 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 23:20:48 +0200 Subject: [PATCH 08/11] Simplify condition --- ext/pcre/php_pcre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e3877651a2a5..9c1ac5b034cf 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1405,7 +1405,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } /* Add the match sets to the output array and clean up */ - if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { + if (match_sets) { if (subpat_names) { for (i = 0; i < num_subpats; i++) { if (subpat_names[i]) { From 26d1d60c20702856d6e6b284a643b95cd2d38366 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 15 Oct 2023 19:14:09 +0200 Subject: [PATCH 09/11] Rework internal functions to avoid repeated unwrapping --- ext/pcre/php_pcre.c | 69 +++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 9c1ac5b034cf..ae91156c7213 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -963,13 +963,13 @@ static zend_always_inline void populate_match_value( } static inline void add_named( - zval *subpats, zend_string *name, zval *val, bool unmatched) { + HashTable *subpats, zend_string *name, zval *val, bool unmatched) { /* If the DUPNAMES option is used, multiple subpatterns might have the same name. * In this case we want to preserve the one that actually has a value. */ if (!unmatched) { - zend_hash_update(Z_ARRVAL_P(subpats), name, val); + zend_hash_update(subpats, name, val); } else { - if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) { + if (!zend_hash_add(subpats, name, val)) { return; } } @@ -978,7 +978,7 @@ static inline void add_named( /* {{{ add_offset_pair */ static inline void add_offset_pair( - zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + HashTable *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, zend_string *name, uint32_t unmatched_as_null) { zval match_pair; @@ -1006,7 +1006,7 @@ static inline void add_offset_pair( if (name) { add_named(result, name, &match_pair, start_offset == PCRE2_UNSET); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(result), &match_pair); + zend_hash_next_index_insert_new(result, &match_pair); } /* }}} */ @@ -1017,16 +1017,17 @@ static void populate_subpat_array( bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0; zval val; int i; + HashTable *subpats_ht = Z_ARRVAL_P(subpats); if (subpat_names) { if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair( - subpats, subject, offsets[2*i], offsets[2*i+1], + subpats_ht, subject, offsets[2*i], offsets[2*i+1], subpat_names[i], unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); + add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); } } } else { @@ -1034,17 +1035,17 @@ static void populate_subpat_array( populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); if (subpat_names[i]) { - add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); + add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { ZVAL_NULL(&val); if (subpat_names[i]) { - zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val); + zend_hash_add(subpats_ht, subpat_names[i], &val); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } } } @@ -1052,18 +1053,18 @@ static void populate_subpat_array( if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair( - subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); + subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); + add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); } } } else { for (i = 0; i < count; i++) { populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); - zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { @@ -1129,9 +1130,9 @@ static zend_always_inline bool is_known_valid_utf8( PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset) { - zval result_set, /* Holds a set of subpatterns after + zval result_set; /* Holds a set of subpatterns after a global match */ - *match_sets = NULL; /* An array of sets of matches for each + HashTable **match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ @@ -1233,9 +1234,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, /* Allocate match sets array and initialize the values. */ if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0); + match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0); for (i=0; i Date: Sun, 15 Oct 2023 19:27:35 +0200 Subject: [PATCH 10/11] Remember Z_ARRVAL_P(return_value) The lookup is loop invariant. --- ext/pcre/php_pcre.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index ae91156c7213..e73a054dc551 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2209,6 +2209,7 @@ static size_t preg_replace_func_impl(zval *return_value, ZEND_ASSERT(subject_ht != NULL); array_init_size(return_value, zend_hash_num_elements(subject_ht)); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ @@ -2222,9 +2223,9 @@ static size_t preg_replace_func_impl(zval *return_value, /* Add to return array */ ZVAL_STR(&zv, result); if (string_key) { - zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + zend_hash_add_new(return_value_ht, string_key, &zv); } else { - zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); + zend_hash_index_add_new(return_value_ht, num_key, &zv); } } zend_tmp_string_release(tmp_subject_entry_str); @@ -2289,6 +2290,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter) ZEND_ASSERT(subject_ht != NULL); array_init_size(return_value, zend_hash_num_elements(subject_ht)); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ @@ -2304,9 +2306,9 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter) /* Add to return array */ ZVAL_STR(&zv, result); if (string_key) { - zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + zend_hash_add_new(return_value_ht, string_key, &zv); } else { - zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); + zend_hash_index_add_new(return_value_ht, num_key, &zv); } } else { zend_string_release_ex(result, 0); @@ -2512,6 +2514,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, /* Initialize return value */ array_init(return_value); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* Calculate the size of the offsets array, and allocate memory for it. */ num_subpats = pce->capture_count + 1; @@ -2572,12 +2575,12 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (offset_capture) { /* Add (match, offset) pair to the return value */ add_offset_pair( - Z_ARRVAL_P(return_value), subject, last_match_offset, offsets[0], + return_value_ht, subject, last_match_offset, offsets[0], NULL, 0); } else { /* Add the piece to the return value */ populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]); - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } /* One less left to do */ @@ -2592,10 +2595,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (!no_empty || offsets[2*i] != offsets[2*i+1]) { if (offset_capture) { add_offset_pair( - Z_ARRVAL_P(return_value), subject, offsets[2*i], offsets[2*i+1], NULL, 0); + return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0); } else { populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]); - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } } } @@ -2669,7 +2672,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (!no_empty || start_offset < ZSTR_LEN(subject_str)) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(Z_ARRVAL_P(return_value), subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); + add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); } else { /* Add the last piece to the return value */ if (start_offset == 0) { @@ -2677,7 +2680,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, } else { populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str)); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } } } @@ -2864,6 +2867,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Initialize return array */ array_init(return_value); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); PCRE_G(error_code) = PHP_PCRE_NO_ERROR; @@ -2905,9 +2909,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Add to return array */ if (string_key) { - zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); + zend_hash_update(return_value_ht, string_key, entry); } else { - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); + zend_hash_index_update(return_value_ht, num_key, entry); } } } else if (count == PCRE2_ERROR_NOMATCH) { @@ -2916,9 +2920,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Add to return array */ if (string_key) { - zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); + zend_hash_update(return_value_ht, string_key, entry); } else { - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); + zend_hash_index_update(return_value_ht, num_key, entry); } } } else { From 51c46f513886590ba68f141ecfac28953060ed59 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 16 Oct 2023 19:37:49 +0200 Subject: [PATCH 11/11] Mark some pointers as const --- ext/pcre/php_pcre.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e73a054dc551..a0d4f8b26541 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -963,7 +963,7 @@ static zend_always_inline void populate_match_value( } static inline void add_named( - HashTable *subpats, zend_string *name, zval *val, bool unmatched) { + HashTable *const subpats, zend_string *name, zval *val, bool unmatched) { /* If the DUPNAMES option is used, multiple subpatterns might have the same name. * In this case we want to preserve the one that actually has a value. */ if (!unmatched) { @@ -978,7 +978,7 @@ static inline void add_named( /* {{{ add_offset_pair */ static inline void add_offset_pair( - HashTable *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, zend_string *name, uint32_t unmatched_as_null) { zval match_pair; @@ -1136,7 +1136,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, subpattern after a global match */ uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t num_subpats; /* Number of captured subpatterns */ int matched; /* Has anything matched */ zend_string **subpat_names; /* Array for named subpatterns */ @@ -1240,7 +1239,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } - offsets = pcre2_get_ovector_pointer(match_data); + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); orig_start_offset = start_offset2; options = @@ -1573,7 +1573,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t num_subpats; /* Number of captured subpatterns */ size_t new_len; /* Length of needed storage */ size_t alloc_len; /* Actual allocated length */ @@ -1615,7 +1614,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; - offsets = pcre2_get_ovector_pointer(match_data); + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT @@ -1813,7 +1813,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ zend_string **subpat_names; /* Array for named subpatterns */ uint32_t num_subpats; /* Number of captured subpatterns */ size_t new_len; /* Length of needed storage */ @@ -1871,7 +1870,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; - offsets = pcre2_get_ovector_pointer(match_data); + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT @@ -2495,7 +2495,6 @@ PHP_FUNCTION(preg_split) PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zend_long limit_val, zend_long flags) { - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ PCRE2_SIZE start_offset; /* Where the new search starts */ @@ -2545,7 +2544,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; - offsets = pcre2_get_ovector_pointer(match_data); + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) {