From 3e9d8107a86bea0c4329798a4a8d385db36aaa72 Mon Sep 17 00:00:00 2001 From: Eduardo Arias Date: Fri, 9 Aug 2024 13:01:37 -0700 Subject: [PATCH] Removed multiple heap-allocated copies in parse_pm_content - The previous version of this function was doing three strdup copies to parse the pm content. The updated version only copies the value once (in order not to modify the Operator's m_param member variable), and then performs the updates inline. - Binary parsing was broken because digits were not compared as characters. - Fail parsing when an invalid hex character is found. - Error message in parse_pm_content would reference freed memory if accessed by caller. Removed anyway because it was unused. --- src/Makefile.am | 1 - src/operators/pm.cc | 103 +++++++++++++++++++++++---------- src/operators/pm.h | 1 - src/operators/pm_f.cc | 27 --------- src/utils/acmp.cc | 129 +----------------------------------------- src/utils/acmp.h | 3 +- 6 files changed, 75 insertions(+), 189 deletions(-) delete mode 100644 src/operators/pm_f.cc diff --git a/src/Makefile.am b/src/Makefile.am index 6f358655a0..75f2ee4a86 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -219,7 +219,6 @@ OPERATORS = \ operators/no_match.cc \ operators/operator.cc \ operators/pm.cc \ - operators/pm_f.cc \ operators/pm_from_file.cc \ operators/rbl.cc \ operators/rsub.cc \ diff --git a/src/operators/pm.cc b/src/operators/pm.cc index ba7039bff4..754746e8fb 100644 --- a/src/operators/pm.cc +++ b/src/operators/pm.cc @@ -15,20 +15,80 @@ #include "src/operators/pm.h" -#include - -#include #include #include #include -#include -#include -#include -#include "src/operators/operator.h" #include "src/utils/acmp.h" #include "src/utils/string.h" + +static inline std::string parse_pm_content(const std::string &op_parm) { + auto offset = op_parm.find_first_not_of(" \t"); + if (offset == std::string::npos) { + return op_parm; + } + + auto size = op_parm.size() - offset; + if (size >= 2 && + op_parm.at(offset) == '\"' && op_parm.back() == '\"') { + offset++; + size -= 2; + } + + if (size == 0) { + return op_parm; + } + + std::string parsed_parm(op_parm.c_str() + offset, size); + + unsigned char bin_offset = 0; + unsigned char bin_parm[3] = { 0 }; + bool bin = false, esc = false; + + char *d = parsed_parm.data(); + for(const char *s = d, *e = d + size; s != e; ++s) { + if (*s == '|') { + bin = !bin; + } else if(!esc && *s == '\\') { + esc = true; + } else { + if (bin) { + if (VALID_HEX(*s)) { + bin_parm[bin_offset] = (char)*s; + bin_offset++; + if (bin_offset == 2) { + unsigned char c = strtol((char *)bin_parm, (char **) nullptr, 16) & 0xFF; + bin_offset = 0; + *d++ = c; + } + } else { + // Invalid hex character + return op_parm; + } + } else if (esc) { + if (*s == ':' || + *s == ';' || + *s == '\\' || + *s == '\"') + { + *d++ = *s; + } else { + // Unsupported escape sequence + return op_parm; + } + esc = false; + } else { + *d++ = *s; + } + } + } + + parsed_parm.resize(d - parsed_parm.c_str()); + return parsed_parm; +} + + namespace modsecurity { namespace operators { @@ -105,36 +165,19 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule, bool Pm::init(const std::string &file, std::string *error) { - std::vector vec; - std::istringstream *iss; - const char *err = NULL; - - char *content = parse_pm_content(m_param.c_str(), m_param.length(), &err); - if (content == NULL) { - iss = new std::istringstream(m_param); - } else { - iss = new std::istringstream(content); - } + const auto op_parm = parse_pm_content(m_param); - std::copy(std::istream_iterator(*iss), + std::istringstream iss{op_parm}; + std::for_each(std::istream_iterator(iss), std::istream_iterator(), - back_inserter(vec)); - - for (auto &a : vec) { - acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length()); - } + [this](const auto &a) { + acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length()); + }); while (m_p->is_failtree_done == 0) { acmp_prepare(m_p); } - if (content) { - free(content); - content = NULL; - } - - delete iss; - return true; } diff --git a/src/operators/pm.h b/src/operators/pm.h index d07ba46c59..72d3a54611 100644 --- a/src/operators/pm.h +++ b/src/operators/pm.h @@ -17,7 +17,6 @@ #define SRC_OPERATORS_PM_H_ #include -#include #include #include #include diff --git a/src/operators/pm_f.cc b/src/operators/pm_f.cc deleted file mode 100644 index bc83a33b4d..0000000000 --- a/src/operators/pm_f.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * ModSecurity, http://www.modsecurity.org/ - * Copyright (c) 2015 - 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/) - * - * You may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * If any of the files related to licensing are missing or if you have any - * other questions related to licensing please contact Trustwave Holdings, Inc. - * directly using the email address security@modsecurity.org. - * - */ - -#include "src/operators/pm_f.h" - -#include - -#include "src/operators/pm_from_file.h" - -namespace modsecurity { -namespace operators { - - -} // namespace operators -} // namespace modsecurity diff --git a/src/utils/acmp.cc b/src/utils/acmp.cc index 708ce3a2b9..fb61f31d99 100644 --- a/src/utils/acmp.cc +++ b/src/utils/acmp.cc @@ -33,135 +33,8 @@ * that should be mitigated. This ACMP parser should be re-written to * consume less memory. */ -extern "C" { - -char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) { - char *parm = NULL; - char *content; - unsigned short int offset = 0; -// char converted = 0; - int i, x; - unsigned char bin = 0, esc = 0, bin_offset = 0; - unsigned char c = 0; - unsigned char bin_parm[3] = { 0 }; - char *processed = NULL; - - content = strdup(op_parm); - - if (content == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - - while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) { - offset++; - }; - - op_len = strlen(content); - - if (content[offset] == '\"' && content[op_len-1] == '\"') { - parm = strdup(content + offset + 1); - if (parm == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - free(content); - content = NULL; - return NULL; - } - parm[op_len - offset - 2] = '\0'; - } else { - parm = strdup(content + offset); - if (parm == NULL) { - free(content); - content = NULL; - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - } - - free(content); - content = NULL; - op_len = strlen(parm); - - if (op_len == 0) { - *error_msg = "Content length is 0."; - free(parm); - return NULL; - } - - for (i = 0, x = 0; i < op_len; i++) { - if (parm[i] == '|') { - if (bin) { - bin = 0; - } else { - bin = 1; - } - } else if(!esc && parm[i] == '\\') { - esc = 1; - } else { - if (bin) { - if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 || - parm[i] == 3 || parm[i] == 4 || parm[i] == 5 || - parm[i] == 6 || parm[i] == 7 || parm[i] == 8 || - parm[i] == 9 || - parm[i] == 'A' || parm[i] == 'a' || - parm[i] == 'B' || parm[i] == 'b' || - parm[i] == 'C' || parm[i] == 'c' || - parm[i] == 'D' || parm[i] == 'd' || - parm[i] == 'E' || parm[i] == 'e' || - parm[i] == 'F' || parm[i] == 'f') - { - bin_parm[bin_offset] = (char)parm[i]; - bin_offset++; - if (bin_offset == 2) { - c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF; - bin_offset = 0; - parm[x] = c; - x++; - //converted = 1; - } - } else if (parm[i] == ' ') { - } - } else if (esc) { - if (parm[i] == ':' || - parm[i] == ';' || - parm[i] == '\\' || - parm[i] == '\"') - { - parm[x] = parm[i]; - x++; - } else { - *error_msg = std::string("Unsupported escape sequence.").c_str(); - free(parm); - return NULL; - } - esc = 0; - //converted = 1; - } else { - parm[x] = parm[i]; - x++; - } - } - } - -#if 0 - if (converted) { - op_len = x; - } -#endif - - //processed = memcpy(processed, parm, op_len); - processed = strdup(parm); - free(parm); - parm = NULL; - - if (processed == NULL) { - *error_msg = std::string("Error allocating memory for pattern matching content.").c_str(); - return NULL; - } - - return processed; -} +extern "C" { /* ******************************************************************************* diff --git a/src/utils/acmp.h b/src/utils/acmp.h index 1af454e1df..a93165fe5d 100644 --- a/src/utils/acmp.h +++ b/src/utils/acmp.h @@ -23,6 +23,7 @@ #endif #include +#include extern "C" { @@ -189,8 +190,6 @@ int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_ */ int acmp_prepare(ACMP *parser); -char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg); - } #endif /*ACMP_H_*/