diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 9465c0e53a956..a0d4f8b26541f 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -948,9 +948,9 @@ static zend_always_inline void populate_match_value_str( ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset); } -static inline void populate_match_value( +static zend_always_inline void populate_match_value( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, - uint32_t unmatched_as_null) { + bool unmatched_as_null) { if (PCRE2_UNSET == start_offset) { if (unmatched_as_null) { ZVAL_NULL(val); @@ -963,13 +963,13 @@ static inline void populate_match_value( } static inline void add_named( - zval *subpats, zend_string *name, zval *val, bool unmatched) { + HashTable *const subpats, zend_string *name, zval *val, bool unmatched) { /* If the DUPNAMES option is used, multiple subpatterns might have the same name. * In this case we want to preserve the one that actually has a value. */ if (!unmatched) { - zend_hash_update(Z_ARRVAL_P(subpats), name, val); + zend_hash_update(subpats, name, val); } else { - if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) { + if (!zend_hash_add(subpats, name, val)) { return; } } @@ -978,7 +978,7 @@ static inline void add_named( /* {{{ add_offset_pair */ static inline void add_offset_pair( - zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, zend_string *name, uint32_t unmatched_as_null) { zval match_pair; @@ -1006,7 +1006,7 @@ static inline void add_offset_pair( if (name) { add_named(result, name, &match_pair, start_offset == PCRE2_UNSET); } - zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair); + zend_hash_next_index_insert_new(result, &match_pair); } /* }}} */ @@ -1017,16 +1017,17 @@ static void populate_subpat_array( bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0; zval val; int i; + HashTable *subpats_ht = Z_ARRVAL_P(subpats); if (subpat_names) { if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair( - subpats, subject, offsets[2*i], offsets[2*i+1], + subpats_ht, subject, offsets[2*i], offsets[2*i+1], subpat_names[i], unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); + add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); } } } else { @@ -1034,17 +1035,17 @@ static void populate_subpat_array( populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); if (subpat_names[i]) { - add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); + add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { ZVAL_NULL(&val); if (subpat_names[i]) { - zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val); + zend_hash_add(subpats_ht, subpat_names[i], &val); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } } } @@ -1052,18 +1053,18 @@ static void populate_subpat_array( if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair( - subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); + subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); + add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); } } } else { for (i = 0; i < count; i++) { populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + zend_hash_next_index_insert_new(subpats_ht, &val); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { @@ -1129,13 +1130,12 @@ static zend_always_inline bool is_known_valid_utf8( PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset) { - zval result_set, /* Holds a set of subpatterns after + zval result_set; /* Holds a set of subpatterns after a global match */ - *match_sets = NULL; /* An array of sets of matches for each + HashTable **match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t num_subpats; /* Number of captured subpatterns */ int matched; /* Has anything matched */ zend_string **subpat_names; /* Array for named subpatterns */ @@ -1215,14 +1215,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } - /* Allocate match sets array and initialize the values. */ - if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0); - for (i=0; icompile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset) @@ -1269,12 +1269,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, matched: matched++; - offsets = pcre2_get_ovector_pointer(match_data); - /* If subpatterns array has been passed, fill it in with values. */ if (subpats != NULL) { /* Try to get the list of substrings and display a warning if failed. */ - if (offsets[1] < offsets[0]) { + if (UNEXPECTED(offsets[1] < offsets[0])) { if (subpat_names) { free_subpats_table(subpat_names, num_subpats); } @@ -1284,12 +1282,12 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } if (global) { /* global pattern matching */ - if (subpats && subpats_order == PREG_PATTERN_ORDER) { + if (subpats_order == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair( - &match_sets[i], subject, offsets[2*i], offsets[2*i+1], + match_sets[i], subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); } } else { @@ -1297,7 +1295,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval val; populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); - zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val); + zend_hash_next_index_insert_new(match_sets[i], &val); } } mark = pcre2_get_mark(match_data); @@ -1317,24 +1315,28 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, for (; i < num_subpats; i++) { if (offset_capture) { add_offset_pair( - &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET, + match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, unmatched_as_null); } else if (unmatched_as_null) { - add_next_index_null(&match_sets[i]); + zval tmp; + ZVAL_NULL(&tmp); + zend_hash_next_index_insert_new(match_sets[i], &tmp); } else { - add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC()); + zval tmp; + ZVAL_EMPTY_STRING(&tmp); + zend_hash_next_index_insert_new(match_sets[i], &tmp); } } } } else { /* Allocate and populate the result set array */ - array_init_size(&result_set, count + (mark ? 1 : 0)); mark = pcre2_get_mark(match_data); + array_init_size(&result_set, count + (mark ? 1 : 0)); populate_subpat_array( &result_set, subject, offsets, subpat_names, num_subpats, count, mark, flags); /* And add it to the output array */ - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set); } } else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ @@ -1408,18 +1410,22 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } /* Add the match sets to the output array and clean up */ - if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { + if (match_sets) { if (subpat_names) { for (i = 0; i < num_subpats; i++) { + zval wrapper; + ZVAL_ARR(&wrapper, match_sets[i]); if (subpat_names[i]) { - zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]); - Z_ADDREF(match_sets[i]); + zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper); + GC_ADDREF(match_sets[i]); } - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper); } } else { for (i = 0; i < num_subpats; i++) { - zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); + zval wrapper; + ZVAL_ARR(&wrapper, match_sets[i]); + zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper); } } efree(match_sets); @@ -1567,7 +1573,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t num_subpats; /* Number of captured subpatterns */ size_t new_len; /* Length of needed storage */ size_t alloc_len; /* Actual allocated length */ @@ -1609,6 +1614,9 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); + /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { @@ -1632,8 +1640,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (result) { @@ -1807,7 +1813,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ zend_string **subpat_names; /* Array for named subpatterns */ uint32_t num_subpats; /* Number of captured subpatterns */ size_t new_len; /* Length of needed storage */ @@ -1865,6 +1870,9 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); + /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { @@ -1886,8 +1894,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (result) { @@ -2203,6 +2209,7 @@ static size_t preg_replace_func_impl(zval *return_value, ZEND_ASSERT(subject_ht != NULL); array_init_size(return_value, zend_hash_num_elements(subject_ht)); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ @@ -2216,9 +2223,9 @@ static size_t preg_replace_func_impl(zval *return_value, /* Add to return array */ ZVAL_STR(&zv, result); if (string_key) { - zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + zend_hash_add_new(return_value_ht, string_key, &zv); } else { - zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); + zend_hash_index_add_new(return_value_ht, num_key, &zv); } } zend_tmp_string_release(tmp_subject_entry_str); @@ -2283,6 +2290,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter) ZEND_ASSERT(subject_ht != NULL); array_init_size(return_value, zend_hash_num_elements(subject_ht)); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ @@ -2298,9 +2306,9 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter) /* Add to return array */ ZVAL_STR(&zv, result); if (string_key) { - zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv); + zend_hash_add_new(return_value_ht, string_key, &zv); } else { - zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv); + zend_hash_index_add_new(return_value_ht, num_key, &zv); } } else { zend_string_release_ex(result, 0); @@ -2487,7 +2495,6 @@ PHP_FUNCTION(preg_split) PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zend_long limit_val, zend_long flags) { - PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ PCRE2_SIZE start_offset; /* Where the new search starts */ @@ -2506,6 +2513,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, /* Initialize return value */ array_init(return_value); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); /* Calculate the size of the offsets array, and allocate memory for it. */ num_subpats = pce->capture_count + 1; @@ -2536,6 +2544,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + /* Array of subpattern offsets */ + PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); + #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, @@ -2555,8 +2566,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, } matched: - offsets = pcre2_get_ovector_pointer(match_data); - if (UNEXPECTED(offsets[1] < offsets[0])) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; break; @@ -2566,12 +2575,12 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (offset_capture) { /* Add (match, offset) pair to the return value */ add_offset_pair( - return_value, subject, last_match_offset, offsets[0], + return_value_ht, subject, last_match_offset, offsets[0], NULL, 0); } else { /* Add the piece to the return value */ populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]); - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } /* One less left to do */ @@ -2586,10 +2595,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (!no_empty || offsets[2*i] != offsets[2*i+1]) { if (offset_capture) { add_offset_pair( - return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0); + return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0); } else { populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]); - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } } } @@ -2663,7 +2672,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, if (!no_empty || start_offset < ZSTR_LEN(subject_str)) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); + add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); } else { /* Add the last piece to the return value */ if (start_offset == 0) { @@ -2671,7 +2680,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, } else { populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str)); } - zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); + zend_hash_next_index_insert_new(return_value_ht, &tmp); } } } @@ -2858,6 +2867,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Initialize return array */ array_init(return_value); + HashTable *return_value_ht = Z_ARRVAL_P(return_value); PCRE_G(error_code) = PHP_PCRE_NO_ERROR; @@ -2899,9 +2909,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Add to return array */ if (string_key) { - zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); + zend_hash_update(return_value_ht, string_key, entry); } else { - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); + zend_hash_index_update(return_value_ht, num_key, entry); } } } else if (count == PCRE2_ERROR_NOMATCH) { @@ -2910,9 +2920,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Add to return array */ if (string_key) { - zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); + zend_hash_update(return_value_ht, string_key, entry); } else { - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); + zend_hash_index_update(return_value_ht, num_key, entry); } } } else {