Skip to content

Commit 5d60651

Browse files
committed
Merge "no_utf_check" and "g_notempty" into single "options".
1 parent e8a04b3 commit 5d60651

File tree

1 file changed

+52
-65
lines changed

1 file changed

+52
-65
lines changed

ext/pcre/php_pcre.c

Lines changed: 52 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -974,12 +974,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
974974
a global match */
975975
*match_sets = NULL; /* An array of sets of matches for each
976976
subpattern after a global match */
977-
uint32_t no_utf_check = 0; /* Execution options */
977+
uint32_t options; /* Execution options */
978978
int count = 0; /* Count of matched subpatterns */
979979
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
980980
uint32_t num_subpats; /* Number of captured subpatterns */
981981
int matched; /* Has anything matched */
982-
uint32_t g_notempty = 0; /* If the match should not be empty */
983982
char **subpat_names; /* Array for named subpatterns */
984983
size_t i;
985984
uint32_t subpats_order; /* Order of subpattern matches */
@@ -1058,13 +1057,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
10581057
matched = 0;
10591058
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
10601059

1060+
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
10611061

1062-
#ifdef HAVE_PCRE_JIT_SUPPORT
1063-
if (!(pce->compile_options & PCRE2_UTF)) {
1064-
no_utf_check = PCRE2_NO_UTF_CHECK;
1065-
}
1066-
1067-
#endif
10681062
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
10691063
match_data = mdata;
10701064
} else {
@@ -1084,8 +1078,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
10841078
do {
10851079
/* Execute the regular expression. */
10861080
#ifdef HAVE_PCRE_JIT_SUPPORT
1087-
if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
1088-
&& no_utf_check && !g_notempty) {
1081+
if ((pce->preg_options & PREG_JIT) && options == PCRE2_NO_UTF_CHECK) {
10891082
if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
10901083
pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
10911084
break;
@@ -1095,10 +1088,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
10951088
} else
10961089
#endif
10971090
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1098-
no_utf_check|g_notempty, match_data, mctx);
1091+
options, match_data, mctx);
10991092

11001093
/* the string was already proved to be valid UTF-8 */
1101-
no_utf_check = PCRE2_NO_UTF_CHECK;
1094+
options |= PCRE2_NO_UTF_CHECK;
11021095

11031096
/* Check for too many substrings condition. */
11041097
if (count == 0) {
@@ -1307,18 +1300,21 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
13071300
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
13081301
the match again at the same point. If this fails (picked up above) we
13091302
advance to the next character. */
1310-
g_notempty = (start_offset2 == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
1311-
1303+
if (start_offset2 == offsets[0]) {
1304+
options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1305+
} else {
1306+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1307+
}
13121308
} else if (count == PCRE2_ERROR_NOMATCH) {
13131309
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
13141310
this is not necessarily the end. We need to advance
13151311
the start offset, and continue. Fudge the offset values
13161312
to achieve this, unless we're already at the end of the string. */
1317-
if (g_notempty != 0 && start_offset2 < subject_len) {
1313+
if ((options & PCRE2_NOTEMPTY_ATSTART) && start_offset2 < subject_len) {
13181314
size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
13191315

13201316
start_offset2 += unit_len;
1321-
g_notempty = 0;
1317+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
13221318
} else {
13231319
break;
13241320
}
@@ -1501,7 +1497,7 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
15011497
/* {{{ php_pcre_replace_impl() */
15021498
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
15031499
{
1504-
uint32_t no_utf_check = 0; /* Execution options */
1500+
uint32_t options; /* Execution options */
15051501
int count = 0; /* Count of matched subpatterns */
15061502
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
15071503
char **subpat_names; /* Array for named subpatterns */
@@ -1511,7 +1507,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
15111507
size_t match_len; /* Length of the current match */
15121508
int backref; /* Backreference number */
15131509
PCRE2_SIZE start_offset; /* Where the new search starts */
1514-
uint32_t g_notempty=0; /* If the match should not be empty */
15151510
char *walkbuf, /* Location of current replacement in the result */
15161511
*walk, /* Used to walk the replacement string */
15171512
*match, /* The current match */
@@ -1551,12 +1546,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
15511546
result_len = 0;
15521547
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
15531548

1554-
#ifdef HAVE_PCRE_JIT_SUPPORT
1555-
if (!(pce->compile_options & PCRE2_UTF)) {
1556-
no_utf_check = PCRE2_NO_UTF_CHECK;
1557-
}
1549+
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
15581550

1559-
#endif
15601551
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
15611552
match_data = mdata;
15621553
} else {
@@ -1573,17 +1564,16 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
15731564
while (1) {
15741565
/* Execute the regular expression. */
15751566
#ifdef HAVE_PCRE_JIT_SUPPORT
1576-
if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
1577-
&& no_utf_check && !g_notempty) {
1567+
if ((pce->preg_options & PREG_JIT) && options == PCRE2_NO_UTF_CHECK) {
15781568
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
15791569
PCRE2_NO_UTF_CHECK, match_data, mctx);
15801570
} else
15811571
#endif
15821572
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1583-
no_utf_check|g_notempty, match_data, mctx);
1573+
options, match_data, mctx);
15841574

15851575
/* the string was already proved to be valid UTF-8 */
1586-
no_utf_check = PCRE2_NO_UTF_CHECK;
1576+
options |= PCRE2_NO_UTF_CHECK;
15871577

15881578
/* Check for too many substrings condition. */
15891579
if (UNEXPECTED(count == 0)) {
@@ -1690,20 +1680,24 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
16901680
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
16911681
the match again at the same point. If this fails (picked up above) we
16921682
advance to the next character. */
1693-
g_notempty = (start_offset == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
1683+
if (start_offset == offsets[0]) {
1684+
options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1685+
} else {
1686+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1687+
}
16941688

16951689
} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
16961690
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
16971691
this is not necessarily the end. We need to advance
16981692
the start offset, and continue. Fudge the offset values
16991693
to achieve this, unless we're already at the end of the string. */
1700-
if (g_notempty != 0 && start_offset < subject_len) {
1694+
if ((options & PCRE2_NOTEMPTY_ATSTART) && start_offset < subject_len) {
17011695
size_t unit_len = calculate_unit_length(pce, piece);
17021696

17031697
start_offset += unit_len;
17041698
memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
17051699
result_len += unit_len;
1706-
g_notempty = 0;
1700+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
17071701
} else {
17081702
if (!result && subject_str) {
17091703
result = zend_string_copy(subject_str);
@@ -1749,15 +1743,14 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
17491743
/* {{{ php_pcre_replace_func_impl() */
17501744
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
17511745
{
1752-
uint32_t no_utf_check = 0; /* Execution options */
1746+
uint32_t options; /* Execution options */
17531747
int count = 0; /* Count of matched subpatterns */
17541748
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
17551749
char **subpat_names; /* Array for named subpatterns */
17561750
uint32_t num_subpats; /* Number of captured subpatterns */
17571751
size_t new_len; /* Length of needed storage */
17581752
size_t alloc_len; /* Actual allocated length */
17591753
PCRE2_SIZE start_offset; /* Where the new search starts */
1760-
uint32_t g_notempty=0; /* If the match should not be empty */
17611754
char *match, /* The current match */
17621755
*piece; /* The current piece of subject */
17631756
size_t result_len; /* Length of result */
@@ -1796,12 +1789,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
17961789
result_len = 0;
17971790
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
17981791

1799-
#ifdef HAVE_PCRE_JIT_SUPPORT
1800-
if (!(pce->compile_options & PCRE2_UTF)) {
1801-
no_utf_check = PCRE2_NO_UTF_CHECK;
1802-
}
1792+
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
18031793

1804-
#endif
18051794
old_mdata_used = mdata_used;
18061795
if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
18071796
mdata_used = 1;
@@ -1821,17 +1810,16 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18211810
while (1) {
18221811
/* Execute the regular expression. */
18231812
#ifdef HAVE_PCRE_JIT_SUPPORT
1824-
if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
1825-
&& no_utf_check && !g_notempty) {
1813+
if ((pce->preg_options & PREG_JIT) && options == PCRE2_NO_UTF_CHECK) {
18261814
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
18271815
PCRE2_NO_UTF_CHECK, match_data, mctx);
18281816
} else
18291817
#endif
18301818
count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1831-
no_utf_check|g_notempty, match_data, mctx);
1819+
options, match_data, mctx);
18321820

18331821
/* the string was already proved to be valid UTF-8 */
1834-
no_utf_check = PCRE2_NO_UTF_CHECK;
1822+
options |= PCRE2_NO_UTF_CHECK;
18351823

18361824
/* Check for too many substrings condition. */
18371825
if (count == 0) {
@@ -1890,20 +1878,24 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18901878
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
18911879
the match again at the same point. If this fails (picked up above) we
18921880
advance to the next character. */
1893-
g_notempty = (start_offset == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
1881+
if (start_offset == offsets[0]) {
1882+
options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1883+
} else {
1884+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
1885+
}
18941886

18951887
} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
18961888
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
18971889
this is not necessarily the end. We need to advance
18981890
the start offset, and continue. Fudge the offset values
18991891
to achieve this, unless we're already at the end of the string. */
1900-
if (g_notempty != 0 && start_offset < subject_len) {
1892+
if ((options & PCRE2_NOTEMPTY_ATSTART) && start_offset < subject_len) {
19011893
size_t unit_len = calculate_unit_length(pce, piece);
19021894

19031895
start_offset += unit_len;
19041896
memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
19051897
result_len += unit_len;
1906-
g_notempty = 0;
1898+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
19071899
} else {
19081900
if (!result && subject_str) {
19091901
result = zend_string_copy(subject_str);
@@ -2422,11 +2414,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24222414
zend_long limit_val, zend_long flags)
24232415
{
24242416
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2425-
uint32_t no_utf_check = 0; /* Execution options */
2417+
uint32_t options; /* Execution options */
24262418
int count = 0; /* Count of matched subpatterns */
24272419
PCRE2_SIZE start_offset; /* Where the new search starts */
24282420
PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
2429-
uint32_t g_notempty = 0; /* If the match should not be empty */
24302421
char *last_match; /* Location of last match */
24312422
uint32_t no_empty; /* If NO_EMPTY flag is set */
24322423
uint32_t delim_capture; /* If delimiters should be captured */
@@ -2455,12 +2446,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24552446
last_match = ZSTR_VAL(subject_str);
24562447
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
24572448

2458-
#ifdef HAVE_PCRE_JIT_SUPPORT
2459-
if (!(pce->compile_options & PCRE2_UTF)) {
2460-
no_utf_check = PCRE2_NO_UTF_CHECK;
2461-
}
2449+
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
24622450

2463-
#endif
24642451
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
24652452
match_data = mdata;
24662453
} else {
@@ -2474,17 +2461,16 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
24742461
/* Get next piece if no limit or limit not yet reached and something matched*/
24752462
while ((limit_val == -1 || limit_val > 1)) {
24762463
#ifdef HAVE_PCRE_JIT_SUPPORT
2477-
if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
2478-
&& no_utf_check && !g_notempty) {
2464+
if ((pce->preg_options & PREG_JIT) && options == PCRE2_NO_UTF_CHECK) {
24792465
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
24802466
PCRE2_NO_UTF_CHECK, match_data, mctx);
24812467
} else
24822468
#endif
24832469
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2484-
no_utf_check|g_notempty, match_data, mctx);
2470+
options, match_data, mctx);
24852471

24862472
/* the string was already proved to be valid UTF-8 */
2487-
no_utf_check = PCRE2_NO_UTF_CHECK;
2473+
options |= PCRE2_NO_UTF_CHECK;
24882474

24892475
/* Check for too many substrings condition. */
24902476
if (count == 0) {
@@ -2538,16 +2524,20 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
25382524
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
25392525
the match again at the same point. If this fails (picked up above) we
25402526
advance to the next character. */
2541-
g_notempty = (start_offset == offsets[0])? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
2527+
if (start_offset == offsets[0]) {
2528+
options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
2529+
} else {
2530+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
2531+
}
25422532

25432533
} else if (count == PCRE2_ERROR_NOMATCH) {
25442534
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
25452535
this is not necessarily the end. We need to advance
25462536
the start offset, and continue. Fudge the offset values
25472537
to achieve this, unless we're already at the end of the string. */
2548-
if (g_notempty != 0 && start_offset < ZSTR_LEN(subject_str)) {
2538+
if ((options & PCRE2_NOTEMPTY_ATSTART) && start_offset < ZSTR_LEN(subject_str)) {
25492539
start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
2550-
g_notempty = 0;
2540+
options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED);
25512541
} else {
25522542
break;
25532543
}
@@ -2749,7 +2739,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
27492739
zval *entry; /* An entry in the input array */
27502740
uint32_t num_subpats; /* Number of captured subpatterns */
27512741
int count = 0; /* Count of matched subpatterns */
2752-
uint32_t no_utf_check = 0; /* Execution options */
2742+
uint32_t options; /* Execution options */
27532743
zend_string *string_key;
27542744
zend_ulong num_key;
27552745
zend_bool invert; /* Whether to return non-matching
@@ -2775,24 +2765,21 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
27752765
}
27762766
}
27772767

2778-
#ifdef HAVE_PCRE_JIT_SUPPORT
2779-
no_utf_check = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2780-
#endif
2768+
options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
27812769

27822770
/* Go through the input array */
27832771
ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
27842772
zend_string *subject_str = zval_get_string(entry);
27852773

27862774
/* Perform the match */
27872775
#ifdef HAVE_PCRE_JIT_SUPPORT
2788-
if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
2789-
&& no_utf_check) {
2776+
if ((pce->preg_options & PREG_JIT) && options) {
27902777
count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
27912778
PCRE2_NO_UTF_CHECK, match_data, mctx);
27922779
} else
27932780
#endif
27942781
count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2795-
no_utf_check, match_data, mctx);
2782+
options, match_data, mctx);
27962783

27972784
/* Check for too many substrings condition. */
27982785
if (count == 0) {

0 commit comments

Comments
 (0)