Skip to content

Commit 9f2d039

Browse files
committed
Update to PCRE2 10.35
We also backport the fix for bug #79846, and add a test case for the related bug #79363.
1 parent c3c76db commit 9f2d039

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4636
-3271
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ PHP NEWS
1010
. Fixed bug #80083 (Optimizer pass 6 removes variables used for ibm_db2 data
1111
binding). (Nikita)
1212

13+
- PCRE:
14+
. Updated to PCRE 10.35. (cmb)
15+
1316
01 Oct 2020, PHP 7.4.11
1417

1518
- Core:

ext/pcre/pcre2lib/pcre2.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
/* This is the public header file for the PCRE library, second API, to be
66
#included by applications that call PCRE2 functions.
77
8-
Copyright (c) 2016-2019 University of Cambridge
8+
Copyright (c) 2016-2020 University of Cambridge
99
1010
-----------------------------------------------------------------------------
1111
Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
4242
/* The current PCRE version information. */
4343

4444
#define PCRE2_MAJOR 10
45-
#define PCRE2_MINOR 34
45+
#define PCRE2_MINOR 35
4646
#define PCRE2_PRERELEASE
47-
#define PCRE2_DATE 2019-11-21
47+
#define PCRE2_DATE 2020-05-09
4848

4949
/* When an application links to a PCRE DLL in Windows, the symbols that are
5050
imported have to be identified as such. When building PCRE2, the appropriate
@@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
181181
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
182182
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
183183
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
184+
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
185+
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
186+
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
184187

185188
/* Options for pcre2_pattern_convert(). */
186189

@@ -445,6 +448,7 @@ released, the numbers must not be changed. */
445448
#define PCRE2_CONFIG_HEAPLIMIT 12
446449
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
447450
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
451+
#define PCRE2_CONFIG_TABLES_LENGTH 15
448452

449453

450454
/* Types for code units in patterns and subject strings. */

ext/pcre/pcre2lib/pcre2_auto_possess.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88
Written by Philip Hazel
99
Original API code Copyright (c) 1997-2012 University of Cambridge
10-
New API code Copyright (c) 2016-2019 University of Cambridge
10+
New API code Copyright (c) 2016-2020 University of Cambridge
1111
1212
-----------------------------------------------------------------------------
1313
Redistribution and use in source and binary forms, with or without
@@ -292,6 +292,7 @@ possessification, and if so, fills a list with its properties.
292292
Arguments:
293293
code points to start of expression
294294
utf TRUE if in UTF mode
295+
ucp TRUE if in UCP mode
295296
fcc points to the case-flipping table
296297
list points to output list
297298
list[0] will be filled with the opcode
@@ -304,7 +305,7 @@ Returns: points to the start of the next opcode if *code is accepted
304305
*/
305306

306307
static PCRE2_SPTR
307-
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
308+
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
308309
uint32_t *list)
309310
{
310311
PCRE2_UCHAR c = *code;
@@ -316,7 +317,8 @@ uint32_t chr;
316317
uint32_t *clist_dest;
317318
const uint32_t *clist_src;
318319
#else
319-
(void)utf; /* Suppress "unused parameter" compiler warning */
320+
(void)utf; /* Suppress "unused parameter" compiler warnings */
321+
(void)ucp;
320322
#endif
321323

322324
list[0] = c;
@@ -396,7 +398,7 @@ switch(c)
396398
list[2] = chr;
397399

398400
#ifdef SUPPORT_UNICODE
399-
if (chr < 128 || (chr < 256 && !utf))
401+
if (chr < 128 || (chr < 256 && !utf && !ucp))
400402
list[3] = fcc[chr];
401403
else
402404
list[3] = UCD_OTHERCASE(chr);
@@ -503,6 +505,7 @@ which case the base cannot be possessified.
503505
Arguments:
504506
code points to the byte code
505507
utf TRUE in UTF mode
508+
ucp TRUE in UCP mode
506509
cb compile data block
507510
base_list the data list of the base opcode
508511
base_end the end of the base opcode
@@ -512,7 +515,7 @@ Returns: TRUE if the auto-possessification is possible
512515
*/
513516

514517
static BOOL
515-
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
518+
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
516519
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
517520
{
518521
PCRE2_UCHAR c;
@@ -651,7 +654,7 @@ for(;;)
651654

652655
while (*next_code == OP_ALT)
653656
{
654-
if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
657+
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
655658
return FALSE;
656659
code = next_code + 1 + LINK_SIZE;
657660
next_code += GET(next_code, 1);
@@ -672,7 +675,8 @@ for(;;)
672675
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
673676

674677
next_code += 1 + LINK_SIZE;
675-
if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
678+
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
679+
rec_limit))
676680
return FALSE;
677681

678682
code += PRIV(OP_lengths)[c];
@@ -688,7 +692,7 @@ for(;;)
688692
/* We now have the next appropriate opcode to compare with the base. Check
689693
for a supported opcode, and load its properties. */
690694

691-
code = get_chr_property_list(code, utf, cb->fcc, list);
695+
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
692696
if (code == NULL) return FALSE; /* Unsupported */
693697

694698
/* If either opcode is a small character list, set pointers for comparing
@@ -1100,21 +1104,22 @@ leaving the remainder of the pattern unpossessified.
11001104
11011105
Arguments:
11021106
code points to start of the byte code
1103-
utf TRUE in UTF mode
11041107
cb compile data block
11051108
11061109
Returns: 0 for success
11071110
-1 if a non-existant opcode is encountered
11081111
*/
11091112

11101113
int
1111-
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
1114+
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
11121115
{
11131116
PCRE2_UCHAR c;
11141117
PCRE2_SPTR end;
11151118
PCRE2_UCHAR *repeat_opcode;
11161119
uint32_t list[8];
11171120
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
1121+
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1122+
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
11181123

11191124
for (;;)
11201125
{
@@ -1126,10 +1131,11 @@ for (;;)
11261131
{
11271132
c -= get_repeat_base(c) - OP_STAR;
11281133
end = (c <= OP_MINUPTO) ?
1129-
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
1134+
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
11301135
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
11311136

1132-
if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
1137+
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1138+
&rec_limit))
11331139
{
11341140
switch(c)
11351141
{
@@ -1181,11 +1187,11 @@ for (;;)
11811187
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
11821188
{
11831189
/* end must not be NULL. */
1184-
end = get_chr_property_list(code, utf, cb->fcc, list);
1190+
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
11851191

11861192
list[1] = (c & 1) == 0;
11871193

1188-
if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
1194+
if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
11891195
{
11901196
switch (c)
11911197
{

ext/pcre/pcre2lib/pcre2_chartables.c

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,21 @@
22
* Perl-Compatible Regular Expressions *
33
*************************************************/
44

5-
/* This file was automatically written by the dftables auxiliary
5+
/* This file was automatically written by the pcre2_dftables auxiliary
66
program. It contains character tables that are used when no external
77
tables are passed to PCRE2 by the application that calls it. The tables
88
are used only for characters whose code values are less than 256. */
99

10-
/*The dftables program (which is distributed with PCRE2) can be used to
11-
build alternative versions of this file. This is necessary if you are
10+
/* This set of tables was written in the C locale. */
11+
12+
/* The pcre2_ftables program (which is distributed with PCRE2) can be used
13+
to build alternative versions of this file. This is necessary if you are
1214
running in an EBCDIC environment, or if you want to default to a different
13-
encoding, for example ISO-8859-1. When dftables is run, it creates these
14-
tables in the current locale. This happens automatically if PCRE2 is
15-
configured with --enable-rebuild-chartables. */
15+
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
16+
these tables in the "C" locale by default. This happens automatically if
17+
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
18+
pcre2_dftables manually with the -L option to build tables using the LC_ALL
19+
locale. */
1620

1721
/* The following #include is present because without it gcc 4.x may remove
1822
the array definition from the final binary if PCRE2 is built into a static
@@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
102106
/* This table contains bit maps for various character classes. Each map is 32
103107
bytes long and the bits run from the least significant end of each byte. The
104108
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
105-
graph print, punct, and cntrl. Other classes are built from combinations. */
109+
graph, print, punct, and cntrl. Other classes are built from combinations. */
106110

107-
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
111+
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
108112
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
109113
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
110114
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
111115

112-
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
116+
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
113117
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
114118
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
115119
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
116120

117-
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
121+
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
118122
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
119123
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
120124
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
121125

122-
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
126+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
123127
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
124128
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
125129
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
126130

127-
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
131+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
128132
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
129133
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
130134
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
131135

132-
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
136+
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
133137
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
134138
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
135139
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
136140

137-
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
141+
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
138142
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
139143
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
140144
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
141145

142-
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
146+
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
143147
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
144148
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
145149
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
146150

147-
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
151+
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
148152
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
149153
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
150154
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
151155

152-
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
156+
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
153157
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
154158
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
155159
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0 commit comments

Comments
 (0)