From 0d81b636be804afcfa3cd949122eb524d70c9290 Mon Sep 17 00:00:00 2001 From: wfjsw Date: Thu, 25 Aug 2022 02:38:05 +0800 Subject: [PATCH 1/5] feat: PCRE2 JIT --- src/operators/verify_cc.cc | 15 ++++++++++++- src/operators/verify_cc.h | 3 +++ src/utils/regex.cc | 43 +++++++++++++++++++++++++++++++------- src/utils/regex.h | 3 +++ 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/src/operators/verify_cc.cc b/src/operators/verify_cc.cc index bdb63f3452..ed466872ea 100644 --- a/src/operators/verify_cc.cc +++ b/src/operators/verify_cc.cc @@ -36,6 +36,8 @@ namespace operators { VerifyCC::~VerifyCC() { #if WITH_PCRE2 pcre2_code_free(m_pc); + pcre2_match_context_free(m_pmc); + pcre2_jit_stack_free(m_pcjs); #else if (m_pc != NULL) { pcre_free(m_pc); @@ -105,6 +107,11 @@ bool VerifyCC::init(const std::string ¶m2, std::string *error) { if (m_pc == NULL) { return false; } + + m_pcje = pcre2_jit_compile(m_pc, PCRE2_JIT_COMPLETE); + m_pmc = pcre2_match_context_create(NULL); + m_pcjs = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(m_pmc, NULL, m_pcjs); #else const char *errptr = NULL; int erroffset = 0; @@ -142,8 +149,14 @@ bool VerifyCC::evaluate(Transaction *t, RuleWithActions *rule, PCRE2_SPTR pcre2_i = reinterpret_cast(i.c_str()); pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); + int ret; for (offset = 0; offset < target_length; offset++) { - int ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, match_data, NULL); + + if (m_pcje == 0) { + ret = pcre2_jit_match(m_pc, pcre2_i, target_length, offset, 0, match_data, m_pmc); + } else { + ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, match_data, m_pmc); + } /* If there was no match, then we are done. */ if (ret < 0) { diff --git a/src/operators/verify_cc.h b/src/operators/verify_cc.h index 91195378a4..3b026f88a6 100644 --- a/src/operators/verify_cc.h +++ b/src/operators/verify_cc.h @@ -53,6 +53,9 @@ class VerifyCC : public Operator { private: #if WITH_PCRE2 pcre2_code *m_pc; + pcre2_match_context *m_pmc; + int m_pcje; + pcre2_jit_stack *m_pcjs; #else pcre *m_pc; pcre_extra *m_pce; diff --git a/src/utils/regex.cc b/src/utils/regex.cc index 45878ed09f..d66efcc8a6 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -73,6 +73,11 @@ Regex::Regex(const std::string& pattern_, bool ignoreCase) PCRE2_SIZE erroroffset = 0; m_pc = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED, pcre2_options, &errornumber, &erroroffset, NULL); + + m_pcje = pcre2_jit_compile(m_pc, PCRE2_JIT_COMPLETE); + m_pmc = pcre2_match_context_create(NULL); + m_pcjs = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(m_pmc, NULL, m_pcjs); #else const char *errptr = NULL; int erroffset; @@ -92,6 +97,8 @@ Regex::Regex(const std::string& pattern_, bool ignoreCase) Regex::~Regex() { #if WITH_PCRE2 pcre2_code_free(m_pc); + pcre2_match_context_free(m_pmc); + pcre2_jit_stack_free(m_pcjs); #else if (m_pc != NULL) { pcre_free(m_pc); @@ -118,8 +125,13 @@ std::list Regex::searchAll(const std::string& s) const { pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); do { - rc = pcre2_match(m_pc, pcre2_s, s.length(), - offset, 0, match_data, NULL); + if (m_pcje == 0) { + rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), + offset, 0, match_data, m_pmc); + } else { + rc = pcre2_match(m_pc, pcre2_s, s.length(), + offset, 0, match_data, m_pmc); + } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else const char *subject = s.c_str(); @@ -159,7 +171,12 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap #ifdef WITH_PCRE2 PCRE2_SPTR pcre2_s = reinterpret_cast(s.c_str()); pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); - int rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); + int rc; + if (m_pcje == 0) { + rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + } else { + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else const char *subject = s.c_str(); @@ -198,7 +215,7 @@ bool Regex::searchGlobal(const std::string& s, std::vector& captu pcre2_options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; } int rc = pcre2_match(m_pc, pcre2_s, s.length(), - startOffset, pcre2_options, match_data, NULL); + startOffset, pcre2_options, match_data, m_pmc); PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -270,9 +287,14 @@ int Regex::search(const std::string& s, SMatch *match) const { #ifdef WITH_PCRE2 PCRE2_SPTR pcre2_s = reinterpret_cast(s.c_str()); pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); - int ret = pcre2_match(m_pc, pcre2_s, s.length(), - 0, 0, match_data, NULL) > 0; - + int ret; + if (m_pcje == 0) { + ret = pcre2_match(m_pc, pcre2_s, s.length(), + 0, 0, match_data, m_pmc) > 0; + } else { + ret = pcre2_match(m_pc, pcre2_s, s.length(), + 0, 0, match_data, m_pmc) > 0; + } if (ret > 0) { // match PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -297,7 +319,12 @@ int Regex::search(const std::string& s) const { #ifdef WITH_PCRE2 PCRE2_SPTR pcre2_s = reinterpret_cast(s.c_str()); pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); - int rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); + int rc; + if (m_pcje == 0) { + rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + } else { + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + } pcre2_match_data_free(match_data); if (rc > 0) { return 1; // match diff --git a/src/utils/regex.h b/src/utils/regex.h index 6bd8ce927d..4cbbe880bf 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -85,6 +85,9 @@ class Regex { private: #if WITH_PCRE2 pcre2_code *m_pc; + pcre2_match_context *m_pmc; + int m_pcje; + pcre2_jit_stack *m_pcjs; #else pcre *m_pc = NULL; pcre_extra *m_pce = NULL; From 6518973464d715466a9dc85a1a59f8195685e600 Mon Sep 17 00:00:00 2001 From: Jabasukuriputo Wang Date: Mon, 5 Sep 2022 17:00:14 +0800 Subject: [PATCH 2/5] remove jit stack --- src/operators/verify_cc.cc | 10 ++-------- src/operators/verify_cc.h | 2 -- src/utils/regex.cc | 24 +++++++++--------------- src/utils/regex.h | 2 -- 4 files changed, 11 insertions(+), 27 deletions(-) diff --git a/src/operators/verify_cc.cc b/src/operators/verify_cc.cc index ed466872ea..76a140912d 100644 --- a/src/operators/verify_cc.cc +++ b/src/operators/verify_cc.cc @@ -36,8 +36,6 @@ namespace operators { VerifyCC::~VerifyCC() { #if WITH_PCRE2 pcre2_code_free(m_pc); - pcre2_match_context_free(m_pmc); - pcre2_jit_stack_free(m_pcjs); #else if (m_pc != NULL) { pcre_free(m_pc); @@ -107,11 +105,7 @@ bool VerifyCC::init(const std::string ¶m2, std::string *error) { if (m_pc == NULL) { return false; } - m_pcje = pcre2_jit_compile(m_pc, PCRE2_JIT_COMPLETE); - m_pmc = pcre2_match_context_create(NULL); - m_pcjs = pcre2_jit_stack_create(32*1024, 512*1024, NULL); - pcre2_jit_stack_assign(m_pmc, NULL, m_pcjs); #else const char *errptr = NULL; int erroffset = 0; @@ -153,9 +147,9 @@ bool VerifyCC::evaluate(Transaction *t, RuleWithActions *rule, for (offset = 0; offset < target_length; offset++) { if (m_pcje == 0) { - ret = pcre2_jit_match(m_pc, pcre2_i, target_length, offset, 0, match_data, m_pmc); + ret = pcre2_jit_match(m_pc, pcre2_i, target_length, offset, 0, match_data, NULL); } else { - ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, match_data, m_pmc); + ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, match_data, NULL); } /* If there was no match, then we are done. */ diff --git a/src/operators/verify_cc.h b/src/operators/verify_cc.h index 3b026f88a6..3f0a0421e4 100644 --- a/src/operators/verify_cc.h +++ b/src/operators/verify_cc.h @@ -53,9 +53,7 @@ class VerifyCC : public Operator { private: #if WITH_PCRE2 pcre2_code *m_pc; - pcre2_match_context *m_pmc; int m_pcje; - pcre2_jit_stack *m_pcjs; #else pcre *m_pc; pcre_extra *m_pce; diff --git a/src/utils/regex.cc b/src/utils/regex.cc index d66efcc8a6..c699f6421f 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -73,11 +73,7 @@ Regex::Regex(const std::string& pattern_, bool ignoreCase) PCRE2_SIZE erroroffset = 0; m_pc = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED, pcre2_options, &errornumber, &erroroffset, NULL); - m_pcje = pcre2_jit_compile(m_pc, PCRE2_JIT_COMPLETE); - m_pmc = pcre2_match_context_create(NULL); - m_pcjs = pcre2_jit_stack_create(32*1024, 512*1024, NULL); - pcre2_jit_stack_assign(m_pmc, NULL, m_pcjs); #else const char *errptr = NULL; int erroffset; @@ -97,8 +93,6 @@ Regex::Regex(const std::string& pattern_, bool ignoreCase) Regex::~Regex() { #if WITH_PCRE2 pcre2_code_free(m_pc); - pcre2_match_context_free(m_pmc); - pcre2_jit_stack_free(m_pcjs); #else if (m_pc != NULL) { pcre_free(m_pc); @@ -127,10 +121,10 @@ std::list Regex::searchAll(const std::string& s) const { do { if (m_pcje == 0) { rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), - offset, 0, match_data, m_pmc); + offset, 0, match_data, NULL); } else { rc = pcre2_match(m_pc, pcre2_s, s.length(), - offset, 0, match_data, m_pmc); + offset, 0, match_data, NULL); } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -173,9 +167,9 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); int rc; if (m_pcje == 0) { - rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); } else { - rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -215,7 +209,7 @@ bool Regex::searchGlobal(const std::string& s, std::vector& captu pcre2_options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; } int rc = pcre2_match(m_pc, pcre2_s, s.length(), - startOffset, pcre2_options, match_data, m_pmc); + startOffset, pcre2_options, match_data, NULL); PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -290,10 +284,10 @@ int Regex::search(const std::string& s, SMatch *match) const { int ret; if (m_pcje == 0) { ret = pcre2_match(m_pc, pcre2_s, s.length(), - 0, 0, match_data, m_pmc) > 0; + 0, 0, match_data, NULL) > 0; } else { ret = pcre2_match(m_pc, pcre2_s, s.length(), - 0, 0, match_data, m_pmc) > 0; + 0, 0, match_data, NULL) > 0; } if (ret > 0) { // match PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); @@ -321,9 +315,9 @@ int Regex::search(const std::string& s) const { pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL); int rc; if (m_pcje == 0) { - rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); } else { - rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, m_pmc); + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); } pcre2_match_data_free(match_data); if (rc > 0) { diff --git a/src/utils/regex.h b/src/utils/regex.h index 4cbbe880bf..41755d4418 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -85,9 +85,7 @@ class Regex { private: #if WITH_PCRE2 pcre2_code *m_pc; - pcre2_match_context *m_pmc; int m_pcje; - pcre2_jit_stack *m_pcjs; #else pcre *m_pc = NULL; pcre_extra *m_pce = NULL; From 1550e3017e6a33337cf4eb1578cb90e2e5cd479c Mon Sep 17 00:00:00 2001 From: Jabasukuriputo Wang Date: Wed, 30 Nov 2022 23:13:29 +0800 Subject: [PATCH 3/5] add fallback for JIT_STACKLIMIT --- src/operators/verify_cc.cc | 6 ++++-- src/utils/regex.cc | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/operators/verify_cc.cc b/src/operators/verify_cc.cc index 76a140912d..ee39902d81 100644 --- a/src/operators/verify_cc.cc +++ b/src/operators/verify_cc.cc @@ -148,8 +148,10 @@ bool VerifyCC::evaluate(Transaction *t, RuleWithActions *rule, if (m_pcje == 0) { ret = pcre2_jit_match(m_pc, pcre2_i, target_length, offset, 0, match_data, NULL); - } else { - ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, match_data, NULL); + } + + if (m_pcje != 0 || ret == PCRE2_ERROR_JIT_STACKLIMIT) { + ret = pcre2_match(m_pc, pcre2_i, target_length, offset, PCRE2_NO_JIT, match_data, NULL); } /* If there was no match, then we are done. */ diff --git a/src/utils/regex.cc b/src/utils/regex.cc index c699f6421f..a29b81cf98 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -122,9 +122,11 @@ std::list Regex::searchAll(const std::string& s) const { if (m_pcje == 0) { rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), offset, 0, match_data, NULL); - } else { + } + + if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) { rc = pcre2_match(m_pc, pcre2_s, s.length(), - offset, 0, match_data, NULL); + offset, PCRE2_NO_JIT, match_data, NULL); } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -168,8 +170,10 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap int rc; if (m_pcje == 0) { rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); - } else { - rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); + } + + if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) { + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, PCRE2_NO_JIT, match_data, NULL); } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); #else @@ -285,9 +289,11 @@ int Regex::search(const std::string& s, SMatch *match) const { if (m_pcje == 0) { ret = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL) > 0; - } else { + } + + if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) { ret = pcre2_match(m_pc, pcre2_s, s.length(), - 0, 0, match_data, NULL) > 0; + 0, PCRE2_NO_JIT, match_data, NULL) > 0; } if (ret > 0) { // match PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); @@ -316,8 +322,10 @@ int Regex::search(const std::string& s) const { int rc; if (m_pcje == 0) { rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); - } else { - rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL); + } + + if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) { + rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, PCRE2_NO_JIT, match_data, NULL); } pcre2_match_data_free(match_data); if (rc > 0) { From 37d3a20da8e55a2670eadd52d2320e3e0a69996b Mon Sep 17 00:00:00 2001 From: Jabasukuriputo Wang Date: Thu, 8 Dec 2022 08:35:33 +0800 Subject: [PATCH 4/5] fix --- src/utils/regex.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/regex.cc b/src/utils/regex.cc index a29b81cf98..0143a0dcca 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -291,7 +291,7 @@ int Regex::search(const std::string& s, SMatch *match) const { 0, 0, match_data, NULL) > 0; } - if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) { + if (m_pcje != 0 || ret == PCRE2_ERROR_JIT_STACKLIMIT) { ret = pcre2_match(m_pc, pcre2_s, s.length(), 0, PCRE2_NO_JIT, match_data, NULL) > 0; } From 54ff1ea53088826b59743438134a4ccfd2e900bd Mon Sep 17 00:00:00 2001 From: wfjsw Date: Sat, 10 Dec 2022 11:42:51 +0800 Subject: [PATCH 5/5] init m_pcje in the constructor of verify_cc.cc --- src/operators/verify_cc.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/operators/verify_cc.h b/src/operators/verify_cc.h index 3f0a0421e4..e2a5e24280 100644 --- a/src/operators/verify_cc.h +++ b/src/operators/verify_cc.h @@ -39,7 +39,12 @@ class VerifyCC : public Operator { explicit VerifyCC(std::unique_ptr param) : Operator("VerifyCC", std::move(param)), #if WITH_PCRE2 - m_pc(NULL) { } + m_pc(NULL) + { +#if WITH_PCRE2 + m_pcje = PCRE2_ERROR_JIT_BADOPTION; +#endif + } #else m_pc(NULL), m_pce(NULL) { }