Skip to content

ext/pcre: update to PCRE2 v10.44 #14498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ PHP NEWS
- PCRE:
. Upgrade bundled pcre2lib to version 10.43. (nielsdos)
. Add "/r" modifier. (Ayesh)
. Upgrade bundled pcre2lib to version 10.44. (Ayesh)

- PDO:
. Fixed setAttribute and getAttribute. (SakiTakamachi)
Expand Down
6 changes: 4 additions & 2 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ PHP 8.4 UPGRADE NOTES
of JIT startup initialization issues.

- PCRE:
. The bundled pcre2lib has been updated to version 10.43.
. The bundled pcre2lib has been updated to version 10.44.
As a consequence, this means {,3} is now recognized as a quantifier instead
of as text. Furthermore, the meaning of some character classes in UCP mode
has changed. Consult https://github.com/PCRE2Project/pcre2/blob/master/NEWS
Expand Down Expand Up @@ -243,10 +243,12 @@ PHP 8.4 UPGRADE NOTES
. Added support for the unix timestamp extension for zip archives.

- PCRE:
. The bundled pcre2lib has been updated to version 10.43.
. The bundled pcre2lib has been updated to version 10.44.
As a consequence, LoongArch JIT support has been added, spaces
are now allowed between braces in Perl-compatible items, and
variable-length lookbehind assertions are now supported.
. With pcre2lib version 10.44, the maximum length of named capture groups
has changed from 32 to 128.
. Added support for the "r" (PCRE2_EXTRA_CASELESS_RESTRICT) modifier, as well
as the (?r) mode modifier. When enabled along with the case-insensitive
modifier ("i"), the expression locks out mixing of ASCII and non-ASCII
Expand Down
7 changes: 5 additions & 2 deletions ext/pcre/pcre2lib/pcre2.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */

#define PCRE2_MAJOR 10
#define PCRE2_MINOR 43
#define PCRE2_MINOR 44
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2024-02-16
#define PCRE2_DATE 2024-06-07

/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
Expand Down Expand Up @@ -603,6 +603,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
Expand Down Expand Up @@ -901,6 +903,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
Expand Down
27 changes: 18 additions & 9 deletions ext/pcre/pcre2lib/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -808,7 +808,8 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100 };
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
ERR101 };

/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
Expand Down Expand Up @@ -7549,7 +7550,8 @@ for (;; pptr++)
if (lengthptr != NULL)
{
PCRE2_SIZE delta;
if (PRIV(ckd_smul)(&delta, repeat_min - 1, length_prevgroup) ||
if (PRIV(ckd_smul)(&delta, repeat_min - 1,
(int)length_prevgroup) ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
Expand Down Expand Up @@ -7599,7 +7601,7 @@ for (;; pptr++)
{
PCRE2_SIZE delta;
if (PRIV(ckd_smul)(&delta, repeat_max,
length_prevgroup + 1 + 2 + 2*LINK_SIZE) ||
(int)length_prevgroup + 1 + 2 + 2*LINK_SIZE) ||
OFLOW_MAX + (2 + 2*LINK_SIZE) - *lengthptr < delta)
{
*errorcodeptr = ERR20;
Expand Down Expand Up @@ -9908,7 +9910,7 @@ do
*bptr |= branchlength; /* branchlength never more than 65535 */
bptr = *pptrptr;
}
while (*bptr == META_ALT);
while (META_CODE(*bptr) == META_ALT);

/* If any branch is of variable length, the whole lookbehind is of variable
length. If the maximum length of any branch exceeds the maximum for variable
Expand Down Expand Up @@ -10601,14 +10603,21 @@ if (length > MAX_PATTERN_SIZE)
goto HAD_CB_ERROR;
}

/* Compute the size of, and then get and initialize, the data block for storing
the compiled pattern and names table. Integer overflow should no longer be
possible because nowadays we limit the maximum value of cb.names_found and
cb.name_entry_size. */
/* Compute the size of, then, if not too large, get and initialize the data
block for storing the compiled pattern and names table. Integer overflow should
no longer be possible because nowadays we limit the maximum value of
cb.names_found and cb.name_entry_size. */

re_blocksize = sizeof(pcre2_real_code) +
CU2BYTES(length +
(PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);

if (re_blocksize > ccontext->max_pattern_compiled_length)
{
errorcode = ERR101;
goto HAD_CB_ERROR;
}

re = (pcre2_real_code *)
ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data);
if (re == NULL)
Expand Down
10 changes: 9 additions & 1 deletion ext/pcre/pcre2lib/pcre2_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -136,6 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = {
NULL, /* Stack guard data */
PRIV(default_tables), /* Character tables */
PCRE2_UNSET, /* Max pattern length */
PCRE2_UNSET, /* Max pattern compiled length */
BSR_DEFAULT, /* Backslash R default */
NEWLINE_DEFAULT, /* Newline convention */
PARENS_NEST_LIMIT, /* As it says */
Expand Down Expand Up @@ -352,6 +353,13 @@ ccontext->max_pattern_length = length;
return 0;
}

PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
{
ccontext->max_pattern_compiled_length = length;
return 0;
}

PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{
Expand Down
3 changes: 2 additions & 1 deletion ext/pcre/pcre2lib/pcre2_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -189,6 +189,7 @@ static const unsigned char compile_error_texts[] =
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
/* 100 */
"branch too long in variable-length lookbehind assertion\0"
"compiled pattern would be longer than the limit set by the application\0"
;

/* Match-time and UTF error texts are in the same format. */
Expand Down
28 changes: 21 additions & 7 deletions ext/pcre/pcre2lib/pcre2_extuni.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -75,7 +75,11 @@ return NULL;
* Match an extended grapheme sequence *
*************************************************/

/*
/* NOTE: The logic contained in this function is replicated in three special-
purpose functions in the pcre2_jit_compile.c module. If the logic below is
changed, they must be kept in step so that the interpreter and the JIT have the
same behaviour.

Arguments:
c the first character
eptr pointer to next character
Expand All @@ -92,6 +96,7 @@ PCRE2_SPTR
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
{
BOOL was_ep_ZWJ = FALSE;
int lgb = UCD_GRAPHBREAK(c);

while (eptr < end_subject)
Expand All @@ -102,6 +107,12 @@ while (eptr < end_subject)
rgb = UCD_GRAPHBREAK(c);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;

/* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
preceded by Extended Pictographic. */

if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
break;

/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */

Expand Down Expand Up @@ -129,12 +140,15 @@ while (eptr < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}

/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
allows any number of them before a following Extended_Pictographic. */
/* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
between; see next statement). */

was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);

/* If Extend follows Extended_Pictographic, do not update lgb; this allows
any number of them before a following ZWJ. */

if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;

eptr += len;
if (xcount != NULL) *xcount += 1;
Expand Down
3 changes: 2 additions & 1 deletion ext/pcre/pcre2lib/pcre2_intmodedep.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -568,6 +568,7 @@ typedef struct pcre2_real_compile_context {
void *stack_guard_data;
const uint8_t *tables;
PCRE2_SIZE max_pattern_length;
PCRE2_SIZE max_pattern_compiled_length;
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
Expand Down
Loading
Loading