@@ -974,12 +974,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
974
974
a global match */
975
975
* match_sets = NULL ; /* An array of sets of matches for each
976
976
subpattern after a global match */
977
- uint32_t no_utf_check = 0 ; /* Execution options */
977
+ uint32_t options ; /* Execution options */
978
978
int count = 0 ; /* Count of matched subpatterns */
979
979
PCRE2_SIZE * offsets ; /* Array of subpattern offsets */
980
980
uint32_t num_subpats ; /* Number of captured subpatterns */
981
981
int matched ; /* Has anything matched */
982
- uint32_t g_notempty = 0 ; /* If the match should not be empty */
983
982
char * * subpat_names ; /* Array for named subpatterns */
984
983
size_t i ;
985
984
uint32_t subpats_order ; /* Order of subpattern matches */
@@ -1058,13 +1057,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
1058
1057
matched = 0 ;
1059
1058
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
1060
1059
1060
+ options = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
1061
1061
1062
- #ifdef HAVE_PCRE_JIT_SUPPORT
1063
- if (!(pce -> compile_options & PCRE2_UTF )) {
1064
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1065
- }
1066
-
1067
- #endif
1068
1062
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE ) {
1069
1063
match_data = mdata ;
1070
1064
} else {
@@ -1084,8 +1078,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
1084
1078
do {
1085
1079
/* Execute the regular expression. */
1086
1080
#ifdef HAVE_PCRE_JIT_SUPPORT
1087
- if (PCRE_G (jit ) && (pce -> preg_options & PREG_JIT )
1088
- && no_utf_check && !g_notempty ) {
1081
+ if ((pce -> preg_options & PREG_JIT ) && options == PCRE2_NO_UTF_CHECK ) {
1089
1082
if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len ) {
1090
1083
pcre_handle_exec_error (PCRE2_ERROR_BADOFFSET );
1091
1084
break ;
@@ -1095,10 +1088,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
1095
1088
} else
1096
1089
#endif
1097
1090
count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , subject_len , start_offset2 ,
1098
- no_utf_check | g_notempty , match_data , mctx );
1091
+ options , match_data , mctx );
1099
1092
1100
1093
/* the string was already proved to be valid UTF-8 */
1101
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1094
+ options | = PCRE2_NO_UTF_CHECK ;
1102
1095
1103
1096
/* Check for too many substrings condition. */
1104
1097
if (count == 0 ) {
@@ -1307,18 +1300,21 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
1307
1300
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1308
1301
the match again at the same point. If this fails (picked up above) we
1309
1302
advance to the next character. */
1310
- g_notempty = (start_offset2 == offsets [0 ]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0 ;
1311
-
1303
+ if (start_offset2 == offsets [0 ]) {
1304
+ options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1305
+ } else {
1306
+ options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1307
+ }
1312
1308
} else if (count == PCRE2_ERROR_NOMATCH ) {
1313
1309
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1314
1310
this is not necessarily the end. We need to advance
1315
1311
the start offset, and continue. Fudge the offset values
1316
1312
to achieve this, unless we're already at the end of the string. */
1317
- if (g_notempty != 0 && start_offset2 < subject_len ) {
1313
+ if (( options & PCRE2_NOTEMPTY_ATSTART ) && start_offset2 < subject_len ) {
1318
1314
size_t unit_len = calculate_unit_length (pce , subject + start_offset2 );
1319
1315
1320
1316
start_offset2 += unit_len ;
1321
- g_notempty = 0 ;
1317
+ options &= ~( PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED ) ;
1322
1318
} else {
1323
1319
break ;
1324
1320
}
@@ -1501,7 +1497,7 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1501
1497
/* {{{ php_pcre_replace_impl() */
1502
1498
PHPAPI zend_string * php_pcre_replace_impl (pcre_cache_entry * pce , zend_string * subject_str , char * subject , size_t subject_len , zend_string * replace_str , size_t limit , size_t * replace_count )
1503
1499
{
1504
- uint32_t no_utf_check = 0 ; /* Execution options */
1500
+ uint32_t options ; /* Execution options */
1505
1501
int count = 0 ; /* Count of matched subpatterns */
1506
1502
PCRE2_SIZE * offsets ; /* Array of subpattern offsets */
1507
1503
char * * subpat_names ; /* Array for named subpatterns */
@@ -1511,7 +1507,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1511
1507
size_t match_len ; /* Length of the current match */
1512
1508
int backref ; /* Backreference number */
1513
1509
PCRE2_SIZE start_offset ; /* Where the new search starts */
1514
- uint32_t g_notempty = 0 ; /* If the match should not be empty */
1515
1510
char * walkbuf , /* Location of current replacement in the result */
1516
1511
* walk , /* Used to walk the replacement string */
1517
1512
* match , /* The current match */
@@ -1551,12 +1546,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1551
1546
result_len = 0 ;
1552
1547
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
1553
1548
1554
- #ifdef HAVE_PCRE_JIT_SUPPORT
1555
- if (!(pce -> compile_options & PCRE2_UTF )) {
1556
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1557
- }
1549
+ options = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
1558
1550
1559
- #endif
1560
1551
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE ) {
1561
1552
match_data = mdata ;
1562
1553
} else {
@@ -1573,17 +1564,16 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1573
1564
while (1 ) {
1574
1565
/* Execute the regular expression. */
1575
1566
#ifdef HAVE_PCRE_JIT_SUPPORT
1576
- if (PCRE_G (jit ) && (pce -> preg_options & PREG_JIT )
1577
- && no_utf_check && !g_notempty ) {
1567
+ if ((pce -> preg_options & PREG_JIT ) && options == PCRE2_NO_UTF_CHECK ) {
1578
1568
count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , subject_len , start_offset ,
1579
1569
PCRE2_NO_UTF_CHECK , match_data , mctx );
1580
1570
} else
1581
1571
#endif
1582
1572
count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , subject_len , start_offset ,
1583
- no_utf_check | g_notempty , match_data , mctx );
1573
+ options , match_data , mctx );
1584
1574
1585
1575
/* the string was already proved to be valid UTF-8 */
1586
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1576
+ options | = PCRE2_NO_UTF_CHECK ;
1587
1577
1588
1578
/* Check for too many substrings condition. */
1589
1579
if (UNEXPECTED (count == 0 )) {
@@ -1690,20 +1680,24 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1690
1680
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1691
1681
the match again at the same point. If this fails (picked up above) we
1692
1682
advance to the next character. */
1693
- g_notempty = (start_offset == offsets [0 ]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0 ;
1683
+ if (start_offset == offsets [0 ]) {
1684
+ options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1685
+ } else {
1686
+ options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1687
+ }
1694
1688
1695
1689
} else if (count == PCRE2_ERROR_NOMATCH || limit == 0 ) {
1696
1690
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1697
1691
this is not necessarily the end. We need to advance
1698
1692
the start offset, and continue. Fudge the offset values
1699
1693
to achieve this, unless we're already at the end of the string. */
1700
- if (g_notempty != 0 && start_offset < subject_len ) {
1694
+ if (( options & PCRE2_NOTEMPTY_ATSTART ) && start_offset < subject_len ) {
1701
1695
size_t unit_len = calculate_unit_length (pce , piece );
1702
1696
1703
1697
start_offset += unit_len ;
1704
1698
memcpy (ZSTR_VAL (result ) + result_len , piece , unit_len );
1705
1699
result_len += unit_len ;
1706
- g_notempty = 0 ;
1700
+ options &= ~( PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED ) ;
1707
1701
} else {
1708
1702
if (!result && subject_str ) {
1709
1703
result = zend_string_copy (subject_str );
@@ -1749,15 +1743,14 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
1749
1743
/* {{{ php_pcre_replace_func_impl() */
1750
1744
static zend_string * php_pcre_replace_func_impl (pcre_cache_entry * pce , zend_string * subject_str , char * subject , size_t subject_len , zend_fcall_info * fci , zend_fcall_info_cache * fcc , size_t limit , size_t * replace_count )
1751
1745
{
1752
- uint32_t no_utf_check = 0 ; /* Execution options */
1746
+ uint32_t options ; /* Execution options */
1753
1747
int count = 0 ; /* Count of matched subpatterns */
1754
1748
PCRE2_SIZE * offsets ; /* Array of subpattern offsets */
1755
1749
char * * subpat_names ; /* Array for named subpatterns */
1756
1750
uint32_t num_subpats ; /* Number of captured subpatterns */
1757
1751
size_t new_len ; /* Length of needed storage */
1758
1752
size_t alloc_len ; /* Actual allocated length */
1759
1753
PCRE2_SIZE start_offset ; /* Where the new search starts */
1760
- uint32_t g_notempty = 0 ; /* If the match should not be empty */
1761
1754
char * match , /* The current match */
1762
1755
* piece ; /* The current piece of subject */
1763
1756
size_t result_len ; /* Length of result */
@@ -1796,12 +1789,8 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
1796
1789
result_len = 0 ;
1797
1790
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
1798
1791
1799
- #ifdef HAVE_PCRE_JIT_SUPPORT
1800
- if (!(pce -> compile_options & PCRE2_UTF )) {
1801
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1802
- }
1792
+ options = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
1803
1793
1804
- #endif
1805
1794
old_mdata_used = mdata_used ;
1806
1795
if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE ) {
1807
1796
mdata_used = 1 ;
@@ -1821,17 +1810,16 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
1821
1810
while (1 ) {
1822
1811
/* Execute the regular expression. */
1823
1812
#ifdef HAVE_PCRE_JIT_SUPPORT
1824
- if (PCRE_G (jit ) && (pce -> preg_options & PREG_JIT )
1825
- && no_utf_check && !g_notempty ) {
1813
+ if ((pce -> preg_options & PREG_JIT ) && options == PCRE2_NO_UTF_CHECK ) {
1826
1814
count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )subject , subject_len , start_offset ,
1827
1815
PCRE2_NO_UTF_CHECK , match_data , mctx );
1828
1816
} else
1829
1817
#endif
1830
1818
count = pcre2_match (pce -> re , (PCRE2_SPTR )subject , subject_len , start_offset ,
1831
- no_utf_check | g_notempty , match_data , mctx );
1819
+ options , match_data , mctx );
1832
1820
1833
1821
/* the string was already proved to be valid UTF-8 */
1834
- no_utf_check = PCRE2_NO_UTF_CHECK ;
1822
+ options | = PCRE2_NO_UTF_CHECK ;
1835
1823
1836
1824
/* Check for too many substrings condition. */
1837
1825
if (count == 0 ) {
@@ -1890,20 +1878,24 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
1890
1878
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1891
1879
the match again at the same point. If this fails (picked up above) we
1892
1880
advance to the next character. */
1893
- g_notempty = (start_offset == offsets [0 ]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0 ;
1881
+ if (start_offset == offsets [0 ]) {
1882
+ options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1883
+ } else {
1884
+ options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
1885
+ }
1894
1886
1895
1887
} else if (count == PCRE2_ERROR_NOMATCH || limit == 0 ) {
1896
1888
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1897
1889
this is not necessarily the end. We need to advance
1898
1890
the start offset, and continue. Fudge the offset values
1899
1891
to achieve this, unless we're already at the end of the string. */
1900
- if (g_notempty != 0 && start_offset < subject_len ) {
1892
+ if (( options & PCRE2_NOTEMPTY_ATSTART ) && start_offset < subject_len ) {
1901
1893
size_t unit_len = calculate_unit_length (pce , piece );
1902
1894
1903
1895
start_offset += unit_len ;
1904
1896
memcpy (ZSTR_VAL (result ) + result_len , piece , unit_len );
1905
1897
result_len += unit_len ;
1906
- g_notempty = 0 ;
1898
+ options &= ~( PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED ) ;
1907
1899
} else {
1908
1900
if (!result && subject_str ) {
1909
1901
result = zend_string_copy (subject_str );
@@ -2422,11 +2414,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2422
2414
zend_long limit_val , zend_long flags )
2423
2415
{
2424
2416
PCRE2_SIZE * offsets ; /* Array of subpattern offsets */
2425
- uint32_t no_utf_check = 0 ; /* Execution options */
2417
+ uint32_t options ; /* Execution options */
2426
2418
int count = 0 ; /* Count of matched subpatterns */
2427
2419
PCRE2_SIZE start_offset ; /* Where the new search starts */
2428
2420
PCRE2_SIZE next_offset ; /* End of the last delimiter match + 1 */
2429
- uint32_t g_notempty = 0 ; /* If the match should not be empty */
2430
2421
char * last_match ; /* Location of last match */
2431
2422
uint32_t no_empty ; /* If NO_EMPTY flag is set */
2432
2423
uint32_t delim_capture ; /* If delimiters should be captured */
@@ -2455,12 +2446,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2455
2446
last_match = ZSTR_VAL (subject_str );
2456
2447
PCRE_G (error_code ) = PHP_PCRE_NO_ERROR ;
2457
2448
2458
- #ifdef HAVE_PCRE_JIT_SUPPORT
2459
- if (!(pce -> compile_options & PCRE2_UTF )) {
2460
- no_utf_check = PCRE2_NO_UTF_CHECK ;
2461
- }
2449
+ options = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
2462
2450
2463
- #endif
2464
2451
if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE ) {
2465
2452
match_data = mdata ;
2466
2453
} else {
@@ -2474,17 +2461,16 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2474
2461
/* Get next piece if no limit or limit not yet reached and something matched*/
2475
2462
while ((limit_val == -1 || limit_val > 1 )) {
2476
2463
#ifdef HAVE_PCRE_JIT_SUPPORT
2477
- if (PCRE_G (jit ) && (pce -> preg_options & PREG_JIT )
2478
- && no_utf_check && !g_notempty ) {
2464
+ if ((pce -> preg_options & PREG_JIT ) && options == PCRE2_NO_UTF_CHECK ) {
2479
2465
count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL (subject_str ), ZSTR_LEN (subject_str ), start_offset ,
2480
2466
PCRE2_NO_UTF_CHECK , match_data , mctx );
2481
2467
} else
2482
2468
#endif
2483
2469
count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL (subject_str ), ZSTR_LEN (subject_str ), start_offset ,
2484
- no_utf_check | g_notempty , match_data , mctx );
2470
+ options , match_data , mctx );
2485
2471
2486
2472
/* the string was already proved to be valid UTF-8 */
2487
- no_utf_check = PCRE2_NO_UTF_CHECK ;
2473
+ options | = PCRE2_NO_UTF_CHECK ;
2488
2474
2489
2475
/* Check for too many substrings condition. */
2490
2476
if (count == 0 ) {
@@ -2538,16 +2524,20 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
2538
2524
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2539
2525
the match again at the same point. If this fails (picked up above) we
2540
2526
advance to the next character. */
2541
- g_notempty = (start_offset == offsets [0 ])? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0 ;
2527
+ if (start_offset == offsets [0 ]) {
2528
+ options |= (PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
2529
+ } else {
2530
+ options &= ~(PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED );
2531
+ }
2542
2532
2543
2533
} else if (count == PCRE2_ERROR_NOMATCH ) {
2544
2534
/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2545
2535
this is not necessarily the end. We need to advance
2546
2536
the start offset, and continue. Fudge the offset values
2547
2537
to achieve this, unless we're already at the end of the string. */
2548
- if (g_notempty != 0 && start_offset < ZSTR_LEN (subject_str )) {
2538
+ if (( options & PCRE2_NOTEMPTY_ATSTART ) && start_offset < ZSTR_LEN (subject_str )) {
2549
2539
start_offset += calculate_unit_length (pce , ZSTR_VAL (subject_str ) + start_offset );
2550
- g_notempty = 0 ;
2540
+ options &= ~( PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED ) ;
2551
2541
} else {
2552
2542
break ;
2553
2543
}
@@ -2749,7 +2739,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
2749
2739
zval * entry ; /* An entry in the input array */
2750
2740
uint32_t num_subpats ; /* Number of captured subpatterns */
2751
2741
int count = 0 ; /* Count of matched subpatterns */
2752
- uint32_t no_utf_check = 0 ; /* Execution options */
2742
+ uint32_t options ; /* Execution options */
2753
2743
zend_string * string_key ;
2754
2744
zend_ulong num_key ;
2755
2745
zend_bool invert ; /* Whether to return non-matching
@@ -2775,24 +2765,21 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
2775
2765
}
2776
2766
}
2777
2767
2778
- #ifdef HAVE_PCRE_JIT_SUPPORT
2779
- no_utf_check = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
2780
- #endif
2768
+ options = (pce -> compile_options & PCRE2_UTF ) ? 0 : PCRE2_NO_UTF_CHECK ;
2781
2769
2782
2770
/* Go through the input array */
2783
2771
ZEND_HASH_FOREACH_KEY_VAL (Z_ARRVAL_P (input ), num_key , string_key , entry ) {
2784
2772
zend_string * subject_str = zval_get_string (entry );
2785
2773
2786
2774
/* Perform the match */
2787
2775
#ifdef HAVE_PCRE_JIT_SUPPORT
2788
- if (PCRE_G (jit ) && (pce -> preg_options & PREG_JIT )
2789
- && no_utf_check ) {
2776
+ if ((pce -> preg_options & PREG_JIT ) && options ) {
2790
2777
count = pcre2_jit_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL (subject_str ), ZSTR_LEN (subject_str ), 0 ,
2791
2778
PCRE2_NO_UTF_CHECK , match_data , mctx );
2792
2779
} else
2793
2780
#endif
2794
2781
count = pcre2_match (pce -> re , (PCRE2_SPTR )ZSTR_VAL (subject_str ), ZSTR_LEN (subject_str ), 0 ,
2795
- no_utf_check , match_data , mctx );
2782
+ options , match_data , mctx );
2796
2783
2797
2784
/* Check for too many substrings condition. */
2798
2785
if (count == 0 ) {
0 commit comments