diff --git a/src/modsecurity.cc b/src/modsecurity.cc index ac8e52563a..106d9968ca 100644 --- a/src/modsecurity.cc +++ b/src/modsecurity.cc @@ -229,9 +229,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len, const unsigned char *buf; size_t jsonSize; - std::list vars = variables.searchAll(matchString); - std::list ops = operators.searchAll(matchString); - std::list trans = transformations.searchAll(matchString); + auto vars = variables.searchAllMatches(matchString); + auto ops = operators.searchAllMatches(matchString); + auto trans = transformations.searchAllMatches(matchString); g = yajl_gen_alloc(NULL); if (g == NULL) { @@ -256,14 +256,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len, strlen("highlight")); yajl_gen_array_open(g); - while (vars.size() > 0) { + for (const auto &m : vars) { std::string value; yajl_gen_map_open(g); - vars.pop_back(); - const std::string &startingAt = vars.back().str(); - vars.pop_back(); - const std::string &size = vars.back().str(); - vars.pop_back(); + auto startingAt = m[1].to_string(matchString); + auto size = m[2].to_string(matchString); yajl_gen_string(g, reinterpret_cast("startingAt"), strlen("startingAt")); @@ -303,7 +300,7 @@ int ModSecurity::processContentOffset(const char *content, size_t len, varValue.size()); yajl_gen_map_close(g); - while (trans.size() > 0) { + for (const auto &m : trans) { modsecurity::actions::transformations::Transformation *t; std::string varValueRes; yajl_gen_map_open(g); @@ -311,15 +308,15 @@ int ModSecurity::processContentOffset(const char *content, size_t len, reinterpret_cast("transformation"), strlen("transformation")); + auto transformation_name = m[0].to_string(matchString); yajl_gen_string(g, - reinterpret_cast(trans.back().str().c_str()), - trans.back().str().size()); + reinterpret_cast(transformation_name.c_str()), + transformation_name.size()); t = modsecurity::actions::transformations::Transformation::instantiate( - trans.back().str().c_str()); + transformation_name.c_str()); varValueRes = t->evaluate(varValue, NULL); varValue.assign(varValueRes); - trans.pop_back(); yajl_gen_string(g, reinterpret_cast("value"), strlen("value")); @@ -338,16 +335,13 @@ int ModSecurity::processContentOffset(const char *content, size_t len, yajl_gen_map_open(g); - while (ops.size() > 0) { + for (const auto &m : ops) { std::string value; yajl_gen_string(g, reinterpret_cast("highlight"), strlen("highlight")); yajl_gen_map_open(g); - ops.pop_back(); - std::string startingAt = ops.back().str(); - ops.pop_back(); - std::string size = ops.back().str(); - ops.pop_back(); + auto startingAt = m[1].to_string(matchString); + auto size = m[2].to_string(matchString); yajl_gen_string(g, reinterpret_cast("startingAt"), strlen("startingAt")); diff --git a/src/operators/rx.cc b/src/operators/rx.cc index b4fc6ff4d7..0dc5314144 100644 --- a/src/operators/rx.cc +++ b/src/operators/rx.cc @@ -51,7 +51,7 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule, re = m_re; } - std::vector captures; + Regex::match_type captures; re->searchOneMatch(input, captures); if (rule && rule->hasCaptureAction() && transaction) { diff --git a/src/operators/verify_cpf.cc b/src/operators/verify_cpf.cc index 0ec49ac479..570e1e2c06 100644 --- a/src/operators/verify_cpf.cc +++ b/src/operators/verify_cpf.cc @@ -110,7 +110,6 @@ bool VerifyCPF::verify(const char *cpfnumber, int len) { bool VerifyCPF::evaluate(Transaction *t, RuleWithActions *rule, const std::string& input, std::shared_ptr ruleMessage) { - std::list matches; bool is_cpf = false; int i; @@ -119,16 +118,19 @@ bool VerifyCPF::evaluate(Transaction *t, RuleWithActions *rule, } for (i = 0; i < input.size() - 1 && is_cpf == false; i++) { - matches = m_re->searchAll(input.substr(i, input.size())); + std::string val = input.substr(i); + auto matches = m_re->searchAllMatches(val); for (const auto & m : matches) { - is_cpf = verify(m.str().c_str(), m.str().size()); + const auto &g = m[0]; + is_cpf = verify(&val[g.m_offset], g.m_length); if (is_cpf) { - logOffset(ruleMessage, m.offset(), m.str().size()); + logOffset(ruleMessage, g.m_offset, g.m_length); if (rule && t && rule->hasCaptureAction()) { + std::string str = g.to_string(val); t->m_collections.m_tx_collection->storeOrUpdateFirst( - "0", m.str()); + "0", str); ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \ - m.str()); + str); } goto out; diff --git a/src/operators/verify_ssn.cc b/src/operators/verify_ssn.cc index 00b0c5c201..f59886d837 100644 --- a/src/operators/verify_ssn.cc +++ b/src/operators/verify_ssn.cc @@ -121,16 +121,19 @@ bool VerifySSN::evaluate(Transaction *t, RuleWithActions *rule, } for (i = 0; i < input.size() - 1 && is_ssn == false; i++) { - matches = m_re->searchAll(input.substr(i, input.size())); - for (const auto & j : matches) { - is_ssn = verify(j.str().c_str(), j.str().size()); + std::string val = input.substr(i); + auto matches = m_re->searchAllMatches(val); + for (const auto & m : matches) { + const auto &g = m[0]; + is_ssn = verify(&val[g.m_offset], g.m_length); if (is_ssn) { - logOffset(ruleMessage, j.offset(), j.str().size()); + logOffset(ruleMessage, g.m_offset, g.m_length); if (rule && t && rule->hasCaptureAction()) { + std::string str = g.to_string(val); t->m_collections.m_tx_collection->storeOrUpdateFirst( - "0", j.str()); + "0", str); ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \ - j.str()); + str); } goto out; diff --git a/src/operators/verify_svnr.cc b/src/operators/verify_svnr.cc index 248e6b4ec1..2d276978b3 100644 --- a/src/operators/verify_svnr.cc +++ b/src/operators/verify_svnr.cc @@ -88,17 +88,20 @@ bool VerifySVNR::evaluate(Transaction *t, RuleWithActions *rule, } for (i = 0; i < input.size() - 1 && is_svnr == false; i++) { - matches = m_re->searchAll(input.substr(i, input.size())); + std::string val = input.substr(i); + auto matches = m_re->searchAllMatches(val); - for (const auto & j : matches) { - is_svnr = verify(j.str().c_str(), j.str().size()); + for (const auto & m : matches) { + const auto &g = m[0]; + is_svnr = verify(&val[g.m_offset], g.m_length); if (is_svnr) { - logOffset(ruleMessage, j.offset(), j.str().size()); + logOffset(ruleMessage, g.m_offset, g.m_length); if (rule && t && rule->hasCaptureAction()) { + std::string str = g.to_string(val); t->m_collections.m_tx_collection->storeOrUpdateFirst( - "0", j.str()); + "0", str); ms_dbg_a(t, 7, "Added VerifySVNR match TX.0: " + \ - j.str()); + str); } goto out; diff --git a/src/utils/regex.cc b/src/utils/regex.cc index 0feb256cca..933411e29f 100644 --- a/src/utils/regex.cc +++ b/src/utils/regex.cc @@ -61,41 +61,7 @@ Regex::~Regex() { } } - -std::list Regex::searchAll(const std::string& s) const { - const char *subject = s.c_str(); - const std::string tmpString = std::string(s.c_str(), s.size()); - int ovector[OVECCOUNT]; - int rc, i, offset = 0; - std::list retList; - - do { - rc = pcre_exec(m_pc, m_pce, subject, - s.size(), offset, 0, ovector, OVECCOUNT); - - for (i = 0; i < rc; i++) { - size_t start = ovector[2*i]; - size_t end = ovector[2*i+1]; - size_t len = end - start; - if (end > s.size()) { - rc = 0; - break; - } - std::string match = std::string(tmpString, start, len); - offset = start + len; - retList.push_front(SMatch(match, start)); - - if (len == 0) { - rc = 0; - break; - } - } - } while (rc > 0); - - return retList; -} - -bool Regex::searchOneMatch(const std::string& s, std::vector& captures) const { +bool Regex::searchOneMatch(const std::string& s, match_type& captures) const { const char *subject = s.c_str(); int ovector[OVECCOUNT]; @@ -115,6 +81,38 @@ bool Regex::searchOneMatch(const std::string& s, std::vector& cap return (rc > 0); } +std::vector Regex::searchAllMatches(const std::string& s) const { + int ovector[OVECCOUNT]; + int offset = 0; + std::vector matches; + + while (int rc = pcre_exec(m_pc, m_pce, s.data(), s.size(), offset, 0, ovector, OVECCOUNT) > 0) { + Regex::match_type match; + + for (int i = 0; i < rc; i++) { + int start = ovector[2*i]; + int end = ovector[2*i+1]; + + // see man pcreapi for details when offsets are set to -1 + if (start >= 0 && end >= 0) { + int len = end - start; + match.emplace_back(i, start, len); + } + } + matches.push_back(std::move(match)); + + // offsets for full match (group 0) + int start = ovector[0]; + int end = ovector[1]; + offset = end; + if (start == end) { + // skip zero-length match (otherwise, the loop won't terminate) + offset++; + } + } + return matches; +} + int Regex::search(const std::string& s, SMatch *match) const { int ovector[OVECCOUNT]; int ret = pcre_exec(m_pc, m_pce, s.c_str(), diff --git a/src/utils/regex.h b/src/utils/regex.h index 46dab6b83e..db10bff29d 100644 --- a/src/utils/regex.h +++ b/src/utils/regex.h @@ -57,10 +57,19 @@ struct SMatchCapture { size_t m_group; // E.g. 0 = full match; 6 = capture group 6 size_t m_offset; // offset of match within the analyzed string size_t m_length; + + // to_string is convenience method for returning string for the match. + // You must supply the same string that was used to obtain the match, + // as offset would be invalid otherwise. + std::string to_string(const std::string &matched_string) const { + return matched_string.substr(m_offset, m_length); + } }; class Regex { public: + typedef std::vector match_type; + explicit Regex(const std::string& pattern_); ~Regex(); @@ -68,8 +77,8 @@ class Regex { Regex(const Regex&) = delete; Regex& operator=(const Regex&) = delete; - std::list searchAll(const std::string& s) const; - bool searchOneMatch(const std::string& s, std::vector& captures) const; + bool searchOneMatch(const std::string& s, match_type& captures) const; + std::vector searchAllMatches(const std::string &s) const; int search(const std::string &s, SMatch *match) const; int search(const std::string &s) const; diff --git a/src/variables/variable.h b/src/variables/variable.h index 09dff6b8ce..6ebe991af2 100644 --- a/src/variables/variable.h +++ b/src/variables/variable.h @@ -123,7 +123,7 @@ class KeyExclusionRegex : public KeyExclusion { ~KeyExclusionRegex() override { } bool match(const std::string &a) override { - return m_re.searchAll(a).size() > 0; + return m_re.search(a); } Utils::Regex m_re; @@ -615,7 +615,7 @@ class Variables : public std::vector { [v](Variable *m) -> bool { VariableRegex *r = dynamic_cast(m); if (r) { - return r->m_r.searchAll(v->getKey()).size() > 0; + return r->m_r.search(v->getKey()); } return v->getKeyWithCollection() == *m->m_fullName.get(); }) != end();