diff --git a/apache2/apache2_config.c b/apache2/apache2_config.c index bfbcb83468..aa10d34aef 100644 --- a/apache2/apache2_config.c +++ b/apache2/apache2_config.c @@ -73,6 +73,9 @@ void *create_directory_config(apr_pool_t *mp, char *path) /* audit log variables */ dcfg->auditlog_flag = NOT_SET; dcfg->auditlog_type = NOT_SET; + #ifdef WITH_YAJL + dcfg->auditlog_format = NOT_SET; + #endif dcfg->max_rule_time = NOT_SET; dcfg->auditlog_dirperms = NOT_SET; dcfg->auditlog_fileperms = NOT_SET; @@ -503,6 +506,10 @@ void *merge_directory_configs(apr_pool_t *mp, void *_parent, void *_child) merged->auditlog2_fd = parent->auditlog2_fd; merged->auditlog2_name = parent->auditlog2_name; } + #ifdef WITH_YAJL + merged->auditlog_format = (child->auditlog_format == NOT_SET + ? parent->auditlog_format : child->auditlog_format); + #endif merged->auditlog_storage_dir = (child->auditlog_storage_dir == NOT_SET_P ? parent->auditlog_storage_dir : child->auditlog_storage_dir); merged->auditlog_parts = (child->auditlog_parts == NOT_SET_P @@ -667,6 +674,9 @@ void init_directory_config(directory_config *dcfg) /* audit log variables */ if (dcfg->auditlog_flag == NOT_SET) dcfg->auditlog_flag = 0; if (dcfg->auditlog_type == NOT_SET) dcfg->auditlog_type = AUDITLOG_SERIAL; + #ifdef WITH_YAJL + if (dcfg->auditlog_format == NOT_SET) dcfg->auditlog_format = AUDITLOGFORMAT_NATIVE; + #endif if (dcfg->max_rule_time == NOT_SET) dcfg->max_rule_time = 0; if (dcfg->auditlog_dirperms == NOT_SET) dcfg->auditlog_dirperms = CREATEMODE_DIR; if (dcfg->auditlog_fileperms == NOT_SET) dcfg->auditlog_fileperms = CREATEMODE; @@ -1282,6 +1292,23 @@ static const char *cmd_audit_log_type(cmd_parms *cmd, void *_dcfg, return NULL; } +#ifdef WITH_YAJL +static const char *cmd_audit_log_mode(cmd_parms *cmd, void *_dcfg, + const char *p1) +{ + directory_config *dcfg = _dcfg; + + if (strcasecmp(p1, "JSON") == 0) dcfg->auditlog_format = AUDITLOGFORMAT_JSON; + else + if (strcasecmp(p1, "Native") == 0) dcfg->auditlog_format = AUDITLOGFORMAT_NATIVE; + else + return (const char *)apr_psprintf(cmd->pool, + "ModSecurity: Unrecognised parameter value for SecAuditLogFormat: %s", p1); + + return NULL; +} +#endif + static const char *cmd_audit_log_dirmode(cmd_parms *cmd, void *_dcfg, const char *p1) { @@ -3223,6 +3250,16 @@ const command_rec module_directives[] = { "whether to use the old audit log format (Serial) or new (Concurrent)" ), +#ifdef WITH_YAJL + AP_INIT_TAKE1 ( + "SecAuditLogFormat", + cmd_audit_log_mode, + NULL, + CMD_SCOPE_ANY, + "whether to emit audit log data in native format or JSON" + ), +#endif + AP_INIT_TAKE1 ( "SecAuditLogStorageDir", cmd_audit_log_storage_dir, diff --git a/apache2/modsecurity.h b/apache2/modsecurity.h index bbec3faa4d..5fb6f6e5a2 100644 --- a/apache2/modsecurity.h +++ b/apache2/modsecurity.h @@ -518,6 +518,11 @@ struct directory_config { /* AUDITLOG_SERIAL (single file) or AUDITLOG_CONCURRENT (multiple files) */ int auditlog_type; +#ifdef WITH_YAJL + /* AUDITLOGFORMAT_NATIVE or AUDITLOGFORMAT_JSON */ + int auditlog_format; +#endif + /* Mode for audit log directories and files */ apr_fileperms_t auditlog_dirperms; apr_fileperms_t auditlog_fileperms; diff --git a/apache2/msc_logging.c b/apache2/msc_logging.c index 3323fac925..ee7415da8d 100644 --- a/apache2/msc_logging.c +++ b/apache2/msc_logging.c @@ -25,6 +25,11 @@ #include "apr_version.h" #include +#ifdef WITH_YAJL +#include +#include "msc_logging_json.h" +#endif + /** * Write the supplied data to the audit log (if the FD is ready), update * the size counters, update the hash context. @@ -381,6 +386,40 @@ static void sec_auditlog_write_producer_header(modsec_rec *msr) { sec_auditlog_write(msr, ".\n", 2); } +#ifdef WITH_YAJL +/** + * Ouput the Producer header into a JSON generator + */ +static void sec_auditlog_write_producer_header_json(modsec_rec *msr, yajl_gen g) { + char **signatures = NULL; + int i; + + // this is written no matter what + yajl_string(g, "producer"); + + /* Try to write verything in one go. */ + if (msr->txcfg->component_signatures->nelts == 0) { + yajl_string(g, MODSEC_MODULE_NAME_FULL); + + return; + } + + // we'll need an array if there are component signatures + yajl_gen_array_open(g); + + /* Start with the ModSecurity signature. */ + yajl_string(g, MODSEC_MODULE_NAME_FULL); + + /* Then loop through the components and output individual signatures. */ + signatures = (char **)msr->txcfg->component_signatures->elts; + for(i = 0; i < msr->txcfg->component_signatures->nelts; i++) { + yajl_string(g, (char *)signatures[i]); + } + + yajl_gen_array_close(g); // array for producers is finished +} +#endif + /* * \brief This function will returns the next chain node * @@ -480,10 +519,144 @@ static int chained_is_matched(modsec_rec *msr, const msre_rule *next_rule) { return 0; } +#ifdef WITH_YAJL /** - * Produce an audit log entry. + * Write detailed information about performance metrics into a JSON generator */ -void sec_audit_logger(modsec_rec *msr) { +static void format_performance_variables_json(modsec_rec *msr, yajl_gen g) { + yajl_string(g, "stopwatch"); + yajl_gen_map_open(g); + + yajl_kv_int(g, "p1", msr->time_phase1); + yajl_kv_int(g, "p2", msr->time_phase2); + yajl_kv_int(g, "p3", msr->time_phase3); + yajl_kv_int(g, "p4", msr->time_phase4); + yajl_kv_int(g, "p5", msr->time_phase5); + yajl_kv_int(g, "sr", msr->time_storage_read); + yajl_kv_int(g, "sw", msr->time_storage_write); + yajl_kv_int(g, "l", msr->time_logging); + yajl_kv_int(g, "gc", msr->time_gc); + + yajl_gen_map_close(g); +} + +/** + * Write detailed information about a rule and its actionset into a JSON generator + */ +static void write_rule_json(modsec_rec *msr, const msre_rule *rule, yajl_gen g) { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + int been_opened = 0; + int k; + + yajl_gen_map_open(g); + + yajl_string(g, "actionset"); + yajl_gen_map_open(g); + if (rule->actionset->id) { + yajl_kv_string(g, "id", log_escape(msr->mp, rule->actionset->id)); + } + if (rule->actionset->rev) { + yajl_kv_string(g, "rev", log_escape(msr->mp, rule->actionset->rev)); + } + if (rule->actionset->msg) { + msc_string *var = (msc_string *)apr_palloc(msr->mp, sizeof(msc_string)); + var->value = (char *)rule->actionset->msg; + var->value_len = strlen(rule->actionset->msg); + expand_macros(msr, var, NULL, msr->mp); + + yajl_kv_string(g, "msg", log_escape_ex(msr->mp, var->value, var->value_len)); + } + if (rule->actionset->version) { + yajl_kv_string(g, "version", log_escape(msr->mp, rule->actionset->version)); + } + if (rule->actionset->logdata) { + msc_string *var = (msc_string *)apr_pcalloc(msr->mp, sizeof(msc_string)); + var->value = (char *)rule->actionset->logdata; + var->value_len = strlen(rule->actionset->logdata); + expand_macros(msr, var, NULL, msr->mp); + + char *logdata = apr_pstrdup(msr->mp, log_escape_hex(msr->mp, (unsigned char *)var->value, var->value_len)); + + // if it is > 512 bytes, then truncate at 512 with ellipsis. + if (strlen(logdata) > 515) { + logdata[512] = '.'; + logdata[513] = '.'; + logdata[514] = '.'; + logdata[515] = '\0'; + } + + yajl_kv_string(g, "logdata", logdata); + } + if (rule->actionset->severity != NOT_SET) { + yajl_kv_int(g, "severity", rule->actionset->severity); + } + if (rule->actionset->accuracy != NOT_SET) { + yajl_kv_int(g, "accuracy", rule->actionset->accuracy); + } + if (rule->actionset->maturity != NOT_SET) { + yajl_kv_int(g, "maturity", rule->actionset->maturity); + } + if (rule->actionset->phase != NOT_SET) { + yajl_kv_int(g, "phase", rule->actionset->phase); + } + yajl_kv_bool(g, "is_chained", rule->actionset->is_chained || (rule->chain_starter != NULL)); + if (rule->actionset->is_chained && (rule->chain_starter == NULL)) { + yajl_kv_bool(g, "chain_starter", 1); + } + + // tags, lazily opened + tarr = apr_table_elts(rule->actionset->actions); + telts = (const apr_table_entry_t*)tarr->elts; + for (k = 0; k < tarr->nelts; k++) { + msre_action *action = (msre_action *)telts[k].val; + if (strcmp(telts[k].key, "tag") == 0) { + if (been_opened == 0) { + yajl_string(g, "tags"); + yajl_gen_array_open(g); + been_opened = 1; + } + + // expand variables in the tag + msc_string *var = (msc_string *)apr_pcalloc(msr->mp, sizeof(msc_string)); + var->value = (char *)action->param; + var->value_len = strlen(action->param); + expand_macros(msr, var, NULL, msr->mp); + + yajl_string(g, log_escape(msr->mp, var->value)); + } + } + + if (been_opened == 1) { + yajl_gen_array_close(g); + } + + yajl_gen_map_close(g); + + yajl_string(g, "operator"); + yajl_gen_map_open(g); + yajl_kv_string(g, "operator", rule->op_name); + yajl_kv_string(g, "operator_param", rule->op_param); + yajl_kv_string(g, "target", rule->p1); + yajl_kv_bool(g, "negated", rule->op_negated); + yajl_gen_map_close(g); + + yajl_string(g, "config"); + yajl_gen_map_open(g); + yajl_kv_string(g, "filename", rule->filename); + yajl_kv_int(g, "line_num", rule->line_num); + yajl_gen_map_close(g); + + yajl_kv_string(g, "unparsed", rule->unparsed); + yajl_kv_bool(g, "is_matched", chained_is_matched(msr, rule)); + + yajl_gen_map_close(g); +} + +/* + * Produce an audit log entry in JSON format. + */ +void sec_audit_logger_json(modsec_rec *msr) { const apr_array_header_t *arr = NULL; apr_table_entry_t *te = NULL; const apr_array_header_t *tarr_pattern = NULL; @@ -501,9 +674,8 @@ void sec_audit_logger(modsec_rec *msr) { char *buf = NULL, *pat = NULL; msc_parm *mparm = NULL; int arg_min, arg_max, sanitize_matched; - - /* the boundary is used by both audit log types */ - msr->new_auditlog_boundary = create_auditlog_boundary(msr->r); + yajl_gen g; + int been_opened = 0; // helper flag for conditionally opening maps /* Return silently if we don't have a request line. This * means we will not be logging request timeouts. @@ -591,29 +763,45 @@ void sec_audit_logger(modsec_rec *msr) { } } - /* AUDITLOG_PART_HEADER */ + /** + * allocate the buffer for the JSON generator + * passing null will force yajl to use malloc/realloc/free + * need to perf test using APR routines + */ + g = yajl_gen_alloc(NULL); - text = apr_psprintf(msr->mp, "--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_HEADER); - sec_auditlog_write(msr, text, strlen(text)); + /** + * don't pretty print JSON + * this is harder to eyeball but much easier to parse programmatically + */ + yajl_gen_config(g, yajl_gen_beautify, 0); - /* Format: time transaction_id remote_addr remote_port local_addr local_port */ + yajl_gen_map_open(g); // IT BEGINS - text = apr_psprintf(msr->mp, "[%s] %s %s %u %s %u", - current_logtime(msr->mp), msr->txid, msr->remote_addr, msr->remote_port, - msr->local_addr, msr->local_port); - sec_auditlog_write(msr, text, strlen(text)); + /* AUDITLOG_PART_HEADER */ + yajl_string(g, "transaction"); + yajl_gen_map_open(g); // transaction top-level key + + yajl_kv_string(g, "time", current_logtime(msr->mp)); + yajl_kv_string(g, "transaction_id", msr->txid); + yajl_kv_string(g, "remote_address", msr->remote_addr); + yajl_kv_int(g, "remote_port", (int)msr->remote_port); // msr->remote_port is unsigned, yajl wants signed + yajl_kv_string(g, "local_address", msr->local_addr); + yajl_kv_int(g, "local_port", (int)msr->local_port); + + yajl_gen_map_close(g); // transaction top-level key is finished + yajl_string(g, "request"); + yajl_gen_map_open(g); // request top-level key /* AUDITLOG_PART_REQUEST_HEADERS */ if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_REQUEST_HEADERS) != NULL) { - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_REQUEST_HEADERS); - sec_auditlog_write(msr, text, strlen(text)); - sanitize_request_line(msr); + yajl_kv_string(g, "request_line", msr->request_line); - sec_auditlog_write(msr, msr->request_line, strlen(msr->request_line)); - sec_auditlog_write(msr, "\n", 1); + yajl_string(g, "headers"); + yajl_gen_map_open(g); // separate map for request headers arr = apr_table_elts(msr->request_headers); te = (apr_table_entry_t *)arr->elts; @@ -625,9 +813,11 @@ void sec_audit_logger(modsec_rec *msr) { sanitized_partial = 0; sanitize_matched = 0; text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, te[i].val); + // write the key no matter what + // since sanitization only occurs on the value + yajl_string(g, te[i].key); if (apr_table_get(msr->request_headers_to_sanitize, te[i].key) != NULL) { buf = apr_psprintf(msr->mp, "%s",text+strlen(te[i].key)+2); - for ( k = 0; k < tarr_pattern->nelts; k++) { if(strncmp(telts_pattern[k].key,te[i].key,strlen(te[i].key)) ==0 ) { mparm = (msc_parm *)telts_pattern[k].val; @@ -656,13 +846,18 @@ void sec_audit_logger(modsec_rec *msr) { } if(sanitized_partial == 1 && sanitize_matched == 0) { - text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, buf); + yajl_string(g, buf); } else { - memset(text + strlen(te[i].key) + 2, '*', strlen(te[i].val)); + memset(buf, '*', strlen(buf)); // strlen also includes the appended newline on the header + yajl_string(g, buf); } + } else { + // we diverge from the original logic a bit because we always print the key + // at this no point sanitization had occured, so we just print the value + yajl_string(g, te[i].val); } - sec_auditlog_write(msr, text, strlen(text)); } + yajl_gen_map_close(g); // request headers map is finished } /* AUDITLOG_PART_REQUEST_BODY */ @@ -749,9 +944,8 @@ void sec_audit_logger(modsec_rec *msr) { unsigned int chunk_offset = 0; unsigned int sanitize_offset = 0; unsigned int sanitize_length = 0; - - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_REQUEST_BODY); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "body"); + yajl_gen_array_open(g); // use an array here because we're writing in chunks for(;;) { rc = modsecurity_request_body_retrieve(msr, &chunk, -1, &my_error_msg); @@ -810,7 +1004,7 @@ void sec_audit_logger(modsec_rec *msr) { /* Write the sanitized chunk to the log * and advance to the next chunk. */ - sec_auditlog_write(msr, chunk->data, chunk->length); + yajl_string(g, chunk->data); chunk_offset += chunk->length; } @@ -819,6 +1013,8 @@ void sec_audit_logger(modsec_rec *msr) { } } + yajl_gen_array_close(g); // request body chunks array is finished + if (rc < 0) { msr_log(msr, 1, "Audit log: %s", my_error_msg); } @@ -838,29 +1034,27 @@ void sec_audit_logger(modsec_rec *msr) { if (buffer == NULL) { msr_log(msr, 1, "Audit log: Failed to reconstruct request body."); } else { - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_FAKE_REQUEST_BODY); - sec_auditlog_write(msr, text, strlen(text)); - sec_auditlog_write(msr, buffer, strlen(buffer)); + yajl_kv_string(g, "fake_body", buffer); } } } + yajl_gen_map_close(g); // request top-level key is finished + + yajl_string(g, "response"); + yajl_gen_map_open(g); // response top-level key + /* AUDITLOG_PART_A_RESPONSE_HEADERS */ if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_A_RESPONSE_HEADERS) != NULL) { - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_A_RESPONSE_HEADERS); - sec_auditlog_write(msr, text, strlen(text)); /* There are no response headers (or the status line) in HTTP 0.9 */ if (msr->response_headers_sent) { - if (msr->status_line != NULL) { - text = apr_psprintf(msr->mp, "%s %s\n", msr->response_protocol, - msr->status_line); - } else { - text = apr_psprintf(msr->mp, "%s %u\n", msr->response_protocol, - msr->response_status); - } - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_string(g, "protocol", msr->response_protocol); + // as an integer, response status is easier to parse than status_line + yajl_kv_int(g, "status", (int)msr->response_status); + yajl_string(g, "headers"); + yajl_gen_map_open(g); // separate map for response headers /* Output headers */ @@ -874,6 +1068,9 @@ void sec_audit_logger(modsec_rec *msr) { sanitized_partial = 0; sanitize_matched = 0; text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, te[i].val); + // write the key no matter what + // since sanitization only occurs on the value + yajl_string(g, te[i].key); if (apr_table_get(msr->response_headers_to_sanitize, te[i].key) != NULL) { buf = apr_psprintf(msr->mp, "%s",text+strlen(te[i].key)+2); @@ -905,13 +1102,18 @@ void sec_audit_logger(modsec_rec *msr) { } if(sanitized_partial == 1 && sanitize_matched == 0) { - text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, buf); + yajl_string(g, buf); } else { - memset(text + strlen(te[i].key) + 2, '*', strlen(te[i].val)); + memset(buf, '*', strlen(buf)); + yajl_string(g, buf); } + } else { + // we diverge from the original logic a bit because we always print the key + // at this point no sanitization had occured, so we just print the value + yajl_string(g, te[i].val); } - sec_auditlog_write(msr, text, strlen(text)); } + yajl_gen_map_close(g); // response headers map is finised } } @@ -921,78 +1123,84 @@ void sec_audit_logger(modsec_rec *msr) { if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_RESPONSE_BODY) != NULL) { if (msr->resbody_data != NULL) { - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_RESPONSE_BODY); - sec_auditlog_write(msr, text, strlen(text)); - sec_auditlog_write(msr, msr->resbody_data, msr->resbody_length); + yajl_kv_string(g, "body", msr->resbody_data); wrote_response_body = 1; } } + yajl_gen_map_close(g); // response top-level key is finished + + yajl_string(g, "audit_data"); + yajl_gen_map_open(g); // audit_data top-level key + /* AUDITLOG_PART_TRAILER */ if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_TRAILER) != NULL) { apr_time_t now = apr_time_now(); - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_TRAILER); - sec_auditlog_write(msr, text, strlen(text)); - /* Messages */ + been_opened = 0; + if (msr->alerts->nelts > 0) { + yajl_string(g, "messages"); + yajl_gen_array_open(g); + been_opened = 1; + } for(i = 0; i < msr->alerts->nelts; i++) { - text = apr_psprintf(msr->mp, "Message: %s\n", ((char **)msr->alerts->elts)[i]); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, ((char **)msr->alerts->elts)[i]); + } + if (been_opened == 1) { + yajl_gen_array_close(g); } /* Apache error messages */ + been_opened = 0; + if (msr->error_messages->nelts > 0) { + yajl_string(g, "error_messages"); + yajl_gen_array_open(g); + been_opened = 1; + } for(i = 0; i < msr->error_messages->nelts; i++) { error_message_t *em = (((error_message_t **)msr->error_messages->elts)[i]); - text = apr_psprintf(msr->mp, "Apache-Error: %s\n", - format_error_log_message(msr->mp, em)); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, format_error_log_message(msr->mp, em)); + } + if (been_opened == 1) { + yajl_gen_array_close(g); } /* Action */ if (msr->was_intercepted) { - text = apr_psprintf(msr->mp, "Action: Intercepted (phase %d)\n", msr->intercept_phase); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "action"); + yajl_gen_map_open(g); + yajl_kv_bool(g, "intercepted", 1); + yajl_kv_int(g, "phase", msr->intercept_phase); + yajl_kv_string(g, "message", msr->intercept_message); + yajl_gen_map_close(g); } /* Apache-Handler */ if (msr->r->handler != NULL) { - text = apr_psprintf(msr->mp, "Apache-Handler: %s\n", msr->r->handler); - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_string(g, "handler", msr->r->handler); } - /* Stopwatch; left in for compatibility reasons */ - text = apr_psprintf(msr->mp, "Stopwatch: %" APR_TIME_T_FMT " %" APR_TIME_T_FMT " (- - -)\n", - msr->request_time, (now - msr->request_time)); - sec_auditlog_write(msr, text, strlen(text)); /* Stopwatch2 */ - { - char *perf_all = format_all_performance_variables(msr, msr->mp); - - text = apr_psprintf(msr->mp, "Stopwatch2: %" APR_TIME_T_FMT " %" APR_TIME_T_FMT - "; %s\n", msr->request_time, (now - msr->request_time), perf_all); - - sec_auditlog_write(msr, text, strlen(text)); - } + format_performance_variables_json(msr, g); /* Our response body does not contain chunks */ /* ENH Only write this when the output was chunked. */ /* ENH Add info when request body was decompressed, dechunked too. */ if (wrote_response_body) { - text = apr_psprintf(msr->mp, "Response-Body-Transformed: Dechunked\n"); - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_bool(g, "response_body_dechunked", 1); } - sec_auditlog_write_producer_header(msr); + sec_auditlog_write_producer_header_json(msr, g); /* Server */ if (msr->server_software != NULL) { - text = apr_psprintf(msr->mp, "Server: %s\n", msr->server_software); - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_string(g, "server", msr->server_software); } + been_opened = 0; /* Sanitised arguments */ { const apr_array_header_t *tarr; @@ -1002,15 +1210,23 @@ void sec_audit_logger(modsec_rec *msr) { telts = (const apr_table_entry_t*)tarr->elts; if (tarr->nelts > 0) { - text = apr_psprintf(msr->mp, "Sanitised-Args: "); - sec_auditlog_write(msr, text, strlen(text)); + if (been_opened == 0) { + yajl_string(g, "sanitized"); + yajl_gen_map_open(g); + been_opened = 1; + } + + yajl_string(g, "args"); + yajl_gen_array_open(g); } for(i = 0; i < tarr->nelts; i++) { msc_arg *arg = (msc_arg *)telts[i].val; - text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), - log_escape(msr->mp, arg->name), ((i == (tarr->nelts - 1)) ? ".\n" : "")); - sec_auditlog_write(msr, text, strlen(text)); + // yay arrays actually make it easier here + yajl_string(g, log_escape(msr->mp, arg->name)); + } + if (tarr->nelts > 0) { + yajl_gen_array_close(g); } } @@ -1023,14 +1239,21 @@ void sec_audit_logger(modsec_rec *msr) { telts = (const apr_table_entry_t*)tarr->elts; if (tarr->nelts > 0) { - text = apr_psprintf(msr->mp, "Sanitised-Request-Headers: "); - sec_auditlog_write(msr, text, strlen(text)); + if (been_opened == 0) { + yajl_string(g, "sanitized"); + yajl_gen_map_open(g); + been_opened = 1; + } + + yajl_string(g, "request_headers"); + yajl_gen_array_open(g); } for(i = 0; i < tarr->nelts; i++) { - text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), - log_escape(msr->mp, telts[i].key), ((i == (tarr->nelts - 1)) ? ".\n" : "")); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, log_escape(msr->mp, telts[i].key)); + } + if (tarr->nelts > 0) { + yajl_gen_array_close(g); } } @@ -1043,40 +1266,58 @@ void sec_audit_logger(modsec_rec *msr) { telts = (const apr_table_entry_t*)tarr->elts; if (tarr->nelts > 0) { - text = apr_psprintf(msr->mp, "Sanitised-Response-Headers: "); - sec_auditlog_write(msr, text, strlen(text)); + if (been_opened == 0) { + yajl_string(g, "sanitized"); + yajl_gen_map_open(g); + been_opened = 1; + } + + yajl_string(g, "response_headers"); + yajl_gen_array_open(g); } for(i = 0; i < tarr->nelts; i++) { - text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), - log_escape(msr->mp, telts[i].key), ((i == (tarr->nelts - 1)) ? ".\n" : "")); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, log_escape(msr->mp, telts[i].key)); + } + if (tarr->nelts > 0) { + yajl_gen_array_close(g); } } + if (been_opened == 1) { + yajl_gen_map_close(g); // sanitized args map is finished + } + /* Web application info. */ if ( ((msr->txcfg->webappid != NULL)&&(strcmp(msr->txcfg->webappid, "default") != 0)) || (msr->sessionid != NULL) || (msr->userid != NULL)) { - text = apr_psprintf(msr->mp, "WebApp-Info: \"%s\" \"%s\" \"%s\"\n", - msr->txcfg->webappid == NULL ? "-" : log_escape(msr->mp, msr->txcfg->webappid), - msr->sessionid == NULL ? "-" : log_escape(msr->mp, msr->sessionid), - msr->userid == NULL ? "-" : log_escape(msr->mp, msr->userid)); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "webapp_info"); + yajl_gen_map_open(g); + + if (msr->txcfg->webappid != NULL) { + yajl_kv_string(g, "id", log_escape(msr->mp, msr->txcfg->webappid)); + } + if (msr->sessionid != NULL) { + yajl_kv_string(g, "session", log_escape(msr->mp, msr->sessionid)); + } + if (msr->userid != NULL) { + yajl_kv_string(g, "user_id", log_escape(msr->mp, msr->userid)); + } + + yajl_gen_map_close(g); } if ( ((msr->txcfg->sensor_id != NULL)&&(strcmp(msr->txcfg->sensor_id, "default") != 0))) { - text = apr_psprintf(msr->mp, "Sensor-Id: \"%s\"\n", - msr->txcfg->sensor_id == NULL ? "-" : log_escape(msr->mp, msr->txcfg->sensor_id)), - sec_auditlog_write(msr, text, strlen(text)); + if(msr->txcfg->sensor_id != NULL) { + yajl_kv_string(g, "sensor_id", log_escape(msr->mp, msr->txcfg->sensor_id)); + } } if (msr->txcfg->is_enabled > 0) { - text = apr_psprintf(msr->mp, "Engine-Mode: \"%s\"\n", - msr->txcfg->is_enabled == 1 ? "DETECTION_ONLY" : "ENABLED"), - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_string(g, "engine_mode", (msr->txcfg->is_enabled == 1 ? "DETECTION_ONLY" : "ENABLED")); } /* Rule performance time */ @@ -1088,54 +1329,70 @@ void sec_audit_logger(modsec_rec *msr) { telts = (const apr_table_entry_t*)tarr->elts; if (tarr->nelts > 0) { - text = apr_psprintf(msr->mp, "Rules-Performance-Info: "); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "rules_performance_info"); + yajl_gen_map_open(g); // separate map for rule perf info } for(i = 0; i < tarr->nelts; i++) { - text = apr_psprintf(msr->mp, "%s\"%s=%s\"%s", ((i == 0) ? "" : ", "), - log_escape(msr->mp, telts[i].key), log_escape(msr->mp, telts[i].val), ((i == (tarr->nelts - 1)) ? ".\n" : "")); - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_string(g, log_escape(msr->mp, telts[i].key), log_escape(msr->mp, telts[i].val)); + } + if (tarr->nelts > 0) { + yajl_gen_map_close(g); // map for rule perf info is finished } } - } + yajl_gen_map_close(g); // audit_data top-level key is finished + /* AUDITLOG_PART_UPLOADS */ if ((strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_UPLOADS) != NULL) && (msr->mpd != NULL)) { multipart_part **parts = NULL; unsigned int total_size = 0; int cfiles = 0; - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_UPLOADS); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "uploads"); + yajl_gen_map_open(g); parts = (multipart_part **)msr->mpd->parts->elts; + yajl_string(g, "info"); + yajl_gen_array_open(g); // separate array for upload info for(cfiles = 0; cfiles < msr->mpd->parts->nelts; cfiles++) { if (parts[cfiles]->type == MULTIPART_FILE) { if(parts[cfiles]->filename != NULL) { - text = apr_psprintf(msr->mp, "%d,%u,\"%s\",\"%s\"\n", cfiles+1, parts[cfiles]->tmp_file_size, log_escape(msr->mp, parts[cfiles]->filename), log_escape(msr->mp, parts[cfiles]->content_type ? parts[cfiles]->content_type : "")); - sec_auditlog_write(msr, text, strlen(text)); + yajl_kv_int(g, "file_size", parts[cfiles]->tmp_file_size); + yajl_kv_string(g, "file_name", log_escape(msr->mp, parts[cfiles]->filename)); + yajl_kv_string(g, "content_type", parts[cfiles]->content_type ? parts[cfiles]->content_type : ""); total_size += parts[cfiles]->tmp_file_size; } } } - text = apr_psprintf(msr->mp, "Total,%u\n", total_size); - sec_auditlog_write(msr, text, strlen(text)); + yajl_gen_array_close(g); // array for upload info is finished + yajl_kv_int(g, "total", total_size); + + yajl_gen_map_close(g); // uploads top-level key is finished } /* AUDITLOG_PART_MATCHEDRULES */ if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_MATCHEDRULES) != NULL) { - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_MATCHEDRULES); - sec_auditlog_write(msr, text, strlen(text)); + yajl_string(g, "matched_rules"); + yajl_gen_array_open(g); // matched_rules top-level key /* Matched Rules */ + for(i = 0; i < msr->matched_rules->nelts; i++) { rule = ((msre_rule **)msr->matched_rules->elts)[i]; if ((rule != NULL) && (rule->actionset != NULL) && rule->actionset->is_chained && (rule->chain_starter == NULL)) { - text = apr_psprintf(msr->mp, "%s\n", rule->unparsed); - sec_auditlog_write(msr, text, strlen(text)); + /* + * create a separate map for each rule chain + * this makes it a lot easier to search for partial chains + */ + yajl_gen_map_open(g); // map for this chain + yajl_kv_bool(g, "chain", 1); + yajl_string(g, "rules"); + yajl_gen_array_open(g); // array for the rules + + write_rule_json(msr, rule, g); do { if (rule->ruleset != NULL) { @@ -1145,33 +1402,48 @@ void sec_audit_logger(modsec_rec *msr) { present = chained_is_matched(msr,next_rule); - if (present == 0) { - text = apr_psprintf(msr->mp, "#%s\n",next_rule->unparsed); - } else { - text = apr_psprintf(msr->mp, "%s\n",next_rule->unparsed); + if (present == 1) { i++; } - - sec_auditlog_write(msr, text, strlen(text)); + write_rule_json(msr, next_rule, g); } } rule = next_rule; } while (rule != NULL && rule->actionset != NULL && rule->actionset->is_chained); - text = apr_psprintf(msr->mp, "\n"); - sec_auditlog_write(msr, text, strlen(text)); + yajl_gen_array_close(g); + + yajl_kv_bool(g, "full_chain_match", present); // if one of the rules didnt match, present is set to 0 + yajl_gen_map_close(g); // close the map for this chain } else { + yajl_gen_map_open(g); + + yajl_kv_bool(g, "chain", 0); + yajl_string(g, "rules"); // this really should be 'rule', but we're keeping in line with other chain maps + + yajl_gen_array_open(g); if ((rule != NULL) && (rule->actionset != NULL) && !rule->actionset->is_chained && (rule->chain_starter == NULL)) { - text = apr_psprintf(msr->mp, "%s\n\n", rule->unparsed); - sec_auditlog_write(msr, text, strlen(text)); + write_rule_json(msr, rule, g); } + yajl_gen_array_close(g); + + yajl_gen_map_close(g); } } - } + yajl_gen_array_close(g); // matched_rules top-level key is finished + } /* AUDITLOG_PART_ENDMARKER */ - text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_ENDMARKER); - sec_auditlog_write(msr, text, strlen(text)); + /* finished building JSON */ + yajl_gen_map_close(g); // box it up! + + const unsigned char *final_buf; + size_t len; + yajl_gen_get_buf(g, &final_buf, &len); + sec_auditlog_write(msr, final_buf, len); + + yajl_gen_clear(g); + yajl_gen_free(g); /* Return here if we were writing to a serial log * as it does not need an index file. @@ -1249,3 +1521,780 @@ void sec_audit_logger(modsec_rec *msr) { apr_file_write_full(msr->txcfg->auditlog2_fd, text, nbytes, &nbytes_written); } } +#endif + +/* + * Produce an audit log entry in native format. + */ +void sec_audit_logger_native(modsec_rec *msr) { + const apr_array_header_t *arr = NULL; + apr_table_entry_t *te = NULL; + const apr_array_header_t *tarr_pattern = NULL; + const apr_table_entry_t *telts_pattern = NULL; + char *str1 = NULL, *str2 = NULL, *text = NULL; + const msre_rule *rule = NULL, *next_rule = NULL; + apr_size_t nbytes, nbytes_written; + unsigned char md5hash[APR_MD5_DIGESTSIZE]; + int was_limited = 0; + int present = 0; + int wrote_response_body = 0; + char *entry_filename, *entry_basename; + apr_status_t rc; + int i, limit, k, sanitized_partial, j; + char *buf = NULL, *pat = NULL; + msc_parm *mparm = NULL; + int arg_min, arg_max, sanitize_matched; + + /* the boundary is used by both audit log types */ + msr->new_auditlog_boundary = create_auditlog_boundary(msr->r); + + /* Return silently if we don't have a request line. This + * means we will not be logging request timeouts. + */ + if (msr->request_line == NULL) { + msr_log(msr, 4, "Audit log: Skipping request whose request_line is null."); + return; + } + + /* Also return silently if we don't have a file descriptor. */ + if (msr->txcfg->auditlog_fd == NULL) { + msr_log(msr, 4, "Audit log: Skipping request since there is nowhere to write to."); + return; + } + + if (msr->txcfg->auditlog_type != AUDITLOG_CONCURRENT) { + /* Serial logging - we already have an open file + * descriptor to write to. + */ + msr->new_auditlog_fd = msr->txcfg->auditlog_fd; + } else { + /* Concurrent logging - we need to create a brand + * new file for this request. + */ + apr_md5_init(&msr->new_auditlog_md5ctx); + + msr->new_auditlog_filename = construct_auditlog_filename(msr->mp, msr->txid); + if (msr->new_auditlog_filename == NULL) return; + + /* The audit log storage directory should be explicitly + * defined. But if it isn't try to write to the same + * directory where the index file is placed. Of course, + * it is *very* bad practice to allow the Apache user + * to write to the same directory where a root user is + * writing to but it's not us that's causing the problem + * and there isn't anything we can do about that. + * + * ENH Actually there is something we can do! We will make + * SecAuditStorageDir mandatory, ask the user to explicitly + * define the storage location *and* refuse to work if the + * index and the storage location are in the same folder. + */ + if (msr->txcfg->auditlog_storage_dir == NULL) { + entry_filename = file_dirname(msr->mp, msr->txcfg->auditlog_name); + } + else { + entry_filename = msr->txcfg->auditlog_storage_dir; + } + if (entry_filename == NULL) return; + + entry_filename = apr_psprintf(msr->mp, "%s%s", entry_filename, msr->new_auditlog_filename); + if (entry_filename == NULL) return; + entry_basename = file_dirname(msr->mp, entry_filename); + if (entry_basename == NULL) return; + + /* IMP1 Surely it would be more efficient to check the folders for + * the audit log repository base path in the configuration phase, to reduce + * the work we do on every request. Also, since our path depends on time, + * we could cache the time we last checked and don't check if we know + * the folder is there. + */ + rc = apr_dir_make_recursive(entry_basename, msr->txcfg->auditlog_dirperms, msr->mp); + if ((rc != APR_SUCCESS) && (rc != APR_EEXIST)) { + msr_log(msr, 1, "Audit log: Failed to create subdirectories: %s (%s)", + entry_basename, get_apr_error(msr->mp, rc)); + return; + } + + rc = apr_file_open(&msr->new_auditlog_fd, entry_filename, + APR_WRITE | APR_TRUNCATE | APR_CREATE | APR_BINARY | APR_FILE_NOCLEANUP, + msr->txcfg->auditlog_fileperms, msr->mp); + if (rc != APR_SUCCESS) { + msr_log(msr, 1, "Audit log: Failed to create file: %s (%s)", + entry_filename, get_apr_error(msr->mp, rc)); + return; + } + } + + /* Lock the mutex, but only if we are using serial format. */ + if (msr->txcfg->auditlog_type != AUDITLOG_CONCURRENT) { + rc = apr_global_mutex_lock(msr->modsecurity->auditlog_lock); + if (rc != APR_SUCCESS) { + msr_log(msr, 1, "Audit log: Failed to lock global mutex: %s", + get_apr_error(msr->mp, rc)); + } + } + + + /* AUDITLOG_PART_HEADER */ + text = apr_psprintf(msr->mp, "--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_HEADER); + sec_auditlog_write(msr, text, strlen(text)); + /* Format: time transaction_id remote_addr remote_port local_addr local_port */ + + text = apr_psprintf(msr->mp, "[%s] %s %s %u %s %u", + current_logtime(msr->mp), msr->txid, msr->remote_addr, msr->remote_port, + msr->local_addr, msr->local_port); + sec_auditlog_write(msr, text, strlen(text)); + + /* AUDITLOG_PART_REQUEST_HEADERS */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_REQUEST_HEADERS) != NULL) { + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_REQUEST_HEADERS); + sec_auditlog_write(msr, text, strlen(text)); + + sanitize_request_line(msr); + sec_auditlog_write(msr, msr->request_line, strlen(msr->request_line)); + sec_auditlog_write(msr, "\n", 1); + + + arr = apr_table_elts(msr->request_headers); + te = (apr_table_entry_t *)arr->elts; + + tarr_pattern = apr_table_elts(msr->pattern_to_sanitize); + telts_pattern = (const apr_table_entry_t*)tarr_pattern->elts; + + for (i = 0; i < arr->nelts; i++) { + sanitized_partial = 0; + sanitize_matched = 0; + text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, te[i].val); + if (apr_table_get(msr->request_headers_to_sanitize, te[i].key) != NULL) { + buf = apr_psprintf(msr->mp, "%s",text+strlen(te[i].key)+2); + for ( k = 0; k < tarr_pattern->nelts; k++) { + if(strncmp(telts_pattern[k].key,te[i].key,strlen(te[i].key)) ==0 ) { + mparm = (msc_parm *)telts_pattern[k].val; + if(mparm->pad_1 == -1) + sanitize_matched = 1; + pat = strstr(buf,mparm->value); + if (pat != NULL) { + j = strlen(mparm->value); + arg_min = j; + arg_max = 1; + while((*pat != '\0')&&(j--)) { + if(arg_max > mparm->pad_2) { + int off = strlen(mparm->value) - arg_max; + int pos = mparm->pad_1-1; + if(off > pos) { + *pat = '*'; + } + } + arg_max++; + arg_min--; + pat++; + } + sanitized_partial = 1; + } + } + } + + if(sanitized_partial == 1 && sanitize_matched == 0) { + text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, buf); + } else { + memset(text + strlen(te[i].key) + 2, '*', strlen(te[i].val)); + } + } + sec_auditlog_write(msr, text, strlen(text)); + } + } + + /* AUDITLOG_PART_REQUEST_BODY */ + + /* Output this part of it was explicitly requested (C) or if it was the faked + * request body that was requested (I) but we have no reason to fake it (it's + * already in the correct format). + */ + if ( (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_REQUEST_BODY) != NULL) + || ( (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_FAKE_REQUEST_BODY) != NULL) + && (msr->mpd == NULL) ) ) + { + if (msr->msc_reqbody_read) { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + apr_array_header_t *sorted_args; + unsigned int offset = 0, last_offset = 0; + msc_arg *nextarg = NULL; + int sanitize = 0; /* IMP1 Use constants for "sanitize" values. */ + char *my_error_msg = NULL; + + sorted_args = apr_array_make(msr->mp, 25, sizeof(const msc_arg *)); + + /* First we need to sort the arguments that need to be + * sanitized in descending order (we are using a stack structure + * to store then so the order will be ascending when we start + * popping them out). This is because we will + * be reading the request body sequentially and must + * sanitize it as we go. + */ + + for(;;) { + nextarg = NULL; + + /* Find the next largest offset (excluding + * the ones we've used up already). + */ + tarr = apr_table_elts(msr->arguments_to_sanitize); + telts = (const apr_table_entry_t*)tarr->elts; + for(i = 0; i < tarr->nelts; i++) { + msc_arg *arg = (msc_arg *)telts[i].val; + if (arg->origin != NULL && + strcmp(arg->origin, "BODY") != 0) + continue; + + if (last_offset == 0) { /* The first time we're here. */ + if (arg->value_origin_offset > offset) { + offset = arg->value_origin_offset; + nextarg = arg; + } + } else { /* Not the first time. */ + if ((arg->value_origin_offset > offset) + &&(arg->value_origin_offset < last_offset)) + { + offset = arg->value_origin_offset; + nextarg = arg; + } + } + } + + /* If we don't have the next argument that means + * we're done here. + */ + if (nextarg == NULL) break; + + sanitize = 2; /* Means time to pop the next argument out. */ + last_offset = offset; + offset = 0; + { /* IMP1 Fix this ugly bit here. */ + msc_arg **x = apr_array_push(sorted_args); + *x = nextarg; + } + } + + /* Now start retrieving the body chunk by chunk and + * sanitize data in pieces. + */ + + rc = modsecurity_request_body_retrieve_start(msr, &my_error_msg); + if (rc < 0) { + msr_log(msr, 1, "Audit log: %s", my_error_msg); + } else { + msc_data_chunk *chunk = NULL; + unsigned int chunk_offset = 0; + unsigned int sanitize_offset = 0; + unsigned int sanitize_length = 0; + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_REQUEST_BODY); + sec_auditlog_write(msr, text, strlen(text)); + + for(;;) { + rc = modsecurity_request_body_retrieve(msr, &chunk, -1, &my_error_msg); + if (chunk != NULL) { + /* Anything greater than 1 means we have more data to sanitize. */ + while (sanitize > 1) { + msc_arg **arg = NULL; + + if (sanitize == 2) { + /* Get the next argument from the stack. */ + arg = (msc_arg **)apr_array_pop(sorted_args); + if (arg == NULL) sanitize = 0; /* We're done sanitising. */ + else { + /* Continue with sanitation to process the + * retrieved argument. + */ + sanitize = 1; + sanitize_offset = (*arg)->value_origin_offset; + sanitize_length = (*arg)->value_origin_len; + } + } + + if (sanitize) { + /* Check if the data we want to sanitize is + * stored in the current chunk. + */ + if (chunk_offset + chunk->length > sanitize_offset) { + unsigned int soff; /* data offset within chunk */ + unsigned int len; /* amount in this chunk to sanitize */ + + soff = sanitize_offset - chunk_offset; + + if (soff + sanitize_length <= chunk->length) { + /* The entire argument resides in the current chunk. */ + len = sanitize_length; + sanitize = 2; /* Get another parameter to sanitize. */ + } else { + /* Some work to do here but we'll need to seek + * another chunk. + */ + len = chunk->length - soff; + sanitize_offset += len; + sanitize_length -= len; + sanitize = 1; /* It's OK to go to the next chunk. */ + } + + /* Yes, we actually write over the original data. + * We shouldn't be needing it any more. + */ + if (soff + len <= chunk->length) { /* double check */ + memset((char *)chunk->data + soff, '*', len); + } + } + } + } + + /* Write the sanitized chunk to the log + * and advance to the next chunk. */ + sec_auditlog_write(msr, chunk->data, chunk->length); + chunk_offset += chunk->length; + } + + if (rc <= 0) { + break; + } + } + + if (rc < 0) { + msr_log(msr, 1, "Audit log: %s", my_error_msg); + } + + modsecurity_request_body_retrieve_end(msr); + } + } + } + + /* AUDITLOG_PART_FAKE_REQUEST_BODY */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_FAKE_REQUEST_BODY) != NULL) { + if ((msr->msc_reqbody_read)&&(msr->mpd != NULL)) { + char *buffer = NULL; + + buffer = multipart_reconstruct_urlencoded_body_sanitize(msr); + if (buffer == NULL) { + msr_log(msr, 1, "Audit log: Failed to reconstruct request body."); + } else { + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_FAKE_REQUEST_BODY); + sec_auditlog_write(msr, text, strlen(text)); + sec_auditlog_write(msr, buffer, strlen(buffer)); + } + } + } + + /* AUDITLOG_PART_A_RESPONSE_HEADERS */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_A_RESPONSE_HEADERS) != NULL) { + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_A_RESPONSE_HEADERS); + sec_auditlog_write(msr, text, strlen(text)); + + /* There are no response headers (or the status line) in HTTP 0.9 */ + if (msr->response_headers_sent) { + if (msr->status_line != NULL) { + text = apr_psprintf(msr->mp, "%s %s\n", msr->response_protocol, + msr->status_line); + } else { + text = apr_psprintf(msr->mp, "%s %u\n", msr->response_protocol, + msr->response_status); + } + sec_auditlog_write(msr, text, strlen(text)); + + /* Output headers */ + + arr = apr_table_elts(msr->response_headers); + te = (apr_table_entry_t *)arr->elts; + + tarr_pattern = apr_table_elts(msr->pattern_to_sanitize); + telts_pattern = (const apr_table_entry_t*)tarr_pattern->elts; + + for (i = 0; i < arr->nelts; i++) { + sanitized_partial = 0; + sanitize_matched = 0; + text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, te[i].val); + if (apr_table_get(msr->response_headers_to_sanitize, te[i].key) != NULL) { + buf = apr_psprintf(msr->mp, "%s",text+strlen(te[i].key)+2); + + for ( k = 0; k < tarr_pattern->nelts; k++) { + if(strncmp(telts_pattern[k].key,te[i].key,strlen(te[i].key)) ==0 ) { + mparm = (msc_parm *)telts_pattern[k].val; + if(mparm->pad_1 == -1) + sanitize_matched = 1; + pat = strstr(buf,mparm->value); + if (pat != NULL) { + j = strlen(mparm->value); + arg_min = j; + arg_max = 1; + while((*pat != '\0')&&(j--)) { + if(arg_max > mparm->pad_2) { + int off = strlen(mparm->value) - arg_max; + int pos = mparm->pad_1-1; + if(off > pos) { + *pat = '*'; + } + } + arg_max++; + arg_min--; + pat++; + } + sanitized_partial = 1; + } + } + } + + if(sanitized_partial == 1 && sanitize_matched == 0) { + text = apr_psprintf(msr->mp, "%s: %s\n", te[i].key, buf); + } else { + memset(text + strlen(te[i].key) + 2, '*', strlen(te[i].val)); + } + } + sec_auditlog_write(msr, text, strlen(text)); + } + } + } + + apr_table_clear(msr->pattern_to_sanitize); + + /* AUDITLOG_PART_RESPONSE_BODY */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_RESPONSE_BODY) != NULL) { + if (msr->resbody_data != NULL) { + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_RESPONSE_BODY); + sec_auditlog_write(msr, text, strlen(text)); + sec_auditlog_write(msr, msr->resbody_data, msr->resbody_length); + wrote_response_body = 1; + } + } + + /* AUDITLOG_PART_TRAILER */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_TRAILER) != NULL) { + apr_time_t now = apr_time_now(); + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_TRAILER); + sec_auditlog_write(msr, text, strlen(text)); + + /* Messages */ + for(i = 0; i < msr->alerts->nelts; i++) { + text = apr_psprintf(msr->mp, "Message: %s\n", ((char **)msr->alerts->elts)[i]); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Apache error messages */ + for(i = 0; i < msr->error_messages->nelts; i++) { + error_message_t *em = (((error_message_t **)msr->error_messages->elts)[i]); + text = apr_psprintf(msr->mp, "Apache-Error: %s\n", + format_error_log_message(msr->mp, em)); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Action */ + if (msr->was_intercepted) { + text = apr_psprintf(msr->mp, "Action: Intercepted (phase %d)\n", msr->intercept_phase); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Apache-Handler */ + if (msr->r->handler != NULL) { + text = apr_psprintf(msr->mp, "Apache-Handler: %s\n", msr->r->handler); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Stopwatch; left in for compatibility reasons */ + text = apr_psprintf(msr->mp, "Stopwatch: %" APR_TIME_T_FMT " %" APR_TIME_T_FMT " (- - -)\n", + msr->request_time, (now - msr->request_time)); + sec_auditlog_write(msr, text, strlen(text)); + + /* Stopwatch2 */ + { + char *perf_all = format_all_performance_variables(msr, msr->mp); + + text = apr_psprintf(msr->mp, "Stopwatch2: %" APR_TIME_T_FMT " %" APR_TIME_T_FMT + "; %s\n", msr->request_time, (now - msr->request_time), perf_all); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Our response body does not contain chunks */ + /* ENH Only write this when the output was chunked. */ + /* ENH Add info when request body was decompressed, dechunked too. */ + if (wrote_response_body) { + text = apr_psprintf(msr->mp, "Response-Body-Transformed: Dechunked\n"); + sec_auditlog_write(msr, text, strlen(text)); + } + + sec_auditlog_write_producer_header(msr); + + /* Server */ + if (msr->server_software != NULL) { + text = apr_psprintf(msr->mp, "Server: %s\n", msr->server_software); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Sanitised arguments */ + { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + + tarr = apr_table_elts(msr->arguments_to_sanitize); + telts = (const apr_table_entry_t*)tarr->elts; + + if (tarr->nelts > 0) { + text = apr_psprintf(msr->mp, "Sanitised-Args: "); + sec_auditlog_write(msr, text, strlen(text)); + } + + for(i = 0; i < tarr->nelts; i++) { + msc_arg *arg = (msc_arg *)telts[i].val; + text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), + log_escape(msr->mp, arg->name), ((i == (tarr->nelts - 1)) ? ".\n" : "")); + sec_auditlog_write(msr, text, strlen(text)); + } + } + + /* Sanitised request headers */ + { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + + tarr = apr_table_elts(msr->request_headers_to_sanitize); + telts = (const apr_table_entry_t*)tarr->elts; + + if (tarr->nelts > 0) { + text = apr_psprintf(msr->mp, "Sanitised-Request-Headers: "); + sec_auditlog_write(msr, text, strlen(text)); + } + + for(i = 0; i < tarr->nelts; i++) { + text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), + log_escape(msr->mp, telts[i].key), ((i == (tarr->nelts - 1)) ? ".\n" : "")); + sec_auditlog_write(msr, text, strlen(text)); + } + } + + /* Sanitised response headers */ + { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + + tarr = apr_table_elts(msr->response_headers_to_sanitize); + telts = (const apr_table_entry_t*)tarr->elts; + + if (tarr->nelts > 0) { + text = apr_psprintf(msr->mp, "Sanitised-Response-Headers: "); + sec_auditlog_write(msr, text, strlen(text)); + } + + for(i = 0; i < tarr->nelts; i++) { + text = apr_psprintf(msr->mp, "%s\"%s\"%s", ((i == 0) ? "" : ", "), + log_escape(msr->mp, telts[i].key), ((i == (tarr->nelts - 1)) ? ".\n" : "")); + sec_auditlog_write(msr, text, strlen(text)); + } + } + + /* Web application info. */ + if ( ((msr->txcfg->webappid != NULL)&&(strcmp(msr->txcfg->webappid, "default") != 0)) + || (msr->sessionid != NULL) || (msr->userid != NULL)) + { + text = apr_psprintf(msr->mp, "WebApp-Info: \"%s\" \"%s\" \"%s\"\n", + msr->txcfg->webappid == NULL ? "-" : log_escape(msr->mp, msr->txcfg->webappid), + msr->sessionid == NULL ? "-" : log_escape(msr->mp, msr->sessionid), + msr->userid == NULL ? "-" : log_escape(msr->mp, msr->userid)); + sec_auditlog_write(msr, text, strlen(text)); + } + + if ( ((msr->txcfg->sensor_id != NULL)&&(strcmp(msr->txcfg->sensor_id, "default") != 0))) + { + text = apr_psprintf(msr->mp, "Sensor-Id: \"%s\"\n", + msr->txcfg->sensor_id == NULL ? "-" : log_escape(msr->mp, msr->txcfg->sensor_id)), + sec_auditlog_write(msr, text, strlen(text)); + } + + + if (msr->txcfg->is_enabled > 0) { + text = apr_psprintf(msr->mp, "Engine-Mode: \"%s\"\n", + msr->txcfg->is_enabled == 1 ? "DETECTION_ONLY" : "ENABLED"), + sec_auditlog_write(msr, text, strlen(text)); + } + + /* Rule performance time */ + if(msr->txcfg->max_rule_time > 0) { + const apr_array_header_t *tarr; + const apr_table_entry_t *telts; + + tarr = apr_table_elts(msr->perf_rules); + telts = (const apr_table_entry_t*)tarr->elts; + + if (tarr->nelts > 0) { + text = apr_psprintf(msr->mp, "Rules-Performance-Info: "); + sec_auditlog_write(msr, text, strlen(text)); + } + + for(i = 0; i < tarr->nelts; i++) { + text = apr_psprintf(msr->mp, "%s\"%s=%s\"%s", ((i == 0) ? "" : ", "), + log_escape(msr->mp, telts[i].key), log_escape(msr->mp, telts[i].val), ((i == (tarr->nelts - 1)) ? ".\n" : "")); + sec_auditlog_write(msr, text, strlen(text)); + } + } + } + + /* AUDITLOG_PART_UPLOADS */ + if ((strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_UPLOADS) != NULL) && (msr->mpd != NULL)) { + multipart_part **parts = NULL; + unsigned int total_size = 0; + int cfiles = 0; + + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_UPLOADS); + sec_auditlog_write(msr, text, strlen(text)); + + parts = (multipart_part **)msr->mpd->parts->elts; + for(cfiles = 0; cfiles < msr->mpd->parts->nelts; cfiles++) { + if (parts[cfiles]->type == MULTIPART_FILE) { + if(parts[cfiles]->filename != NULL) { + text = apr_psprintf(msr->mp, "%d,%u,\"%s\",\"%s\"\n", cfiles+1, parts[cfiles]->tmp_file_size, log_escape(msr->mp, parts[cfiles]->filename), log_escape(msr->mp, parts[cfiles]->content_type ? parts[cfiles]->content_type : "")); + sec_auditlog_write(msr, text, strlen(text)); + total_size += parts[cfiles]->tmp_file_size; + } + } + } + text = apr_psprintf(msr->mp, "Total,%u\n", total_size); + sec_auditlog_write(msr, text, strlen(text)); + } + + /* AUDITLOG_PART_MATCHEDRULES */ + + if (strchr(msr->txcfg->auditlog_parts, AUDITLOG_PART_MATCHEDRULES) != NULL) { + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_MATCHEDRULES); + sec_auditlog_write(msr, text, strlen(text)); + + /* Matched Rules */ + + for(i = 0; i < msr->matched_rules->nelts; i++) { + rule = ((msre_rule **)msr->matched_rules->elts)[i]; + if ((rule != NULL) && (rule->actionset != NULL) && rule->actionset->is_chained && (rule->chain_starter == NULL)) { + text = apr_psprintf(msr->mp, "%s\n", rule->unparsed); + sec_auditlog_write(msr, text, strlen(text)); + do { + if (rule->ruleset != NULL) { + + next_rule = return_chained_rule(rule,msr); + + if (next_rule != NULL) { + + present = chained_is_matched(msr,next_rule); + + if (present == 0) { + text = apr_psprintf(msr->mp, "#%s\n",next_rule->unparsed); + } else { + text = apr_psprintf(msr->mp, "%s\n",next_rule->unparsed); + i++; + } + sec_auditlog_write(msr, text, strlen(text)); + } + } + rule = next_rule; + } while (rule != NULL && rule->actionset != NULL && rule->actionset->is_chained); + text = apr_psprintf(msr->mp, "\n"); + sec_auditlog_write(msr, text, strlen(text)); + } else { + if ((rule != NULL) && (rule->actionset != NULL) && !rule->actionset->is_chained && (rule->chain_starter == NULL)) { + text = apr_psprintf(msr->mp, "%s\n\n", rule->unparsed); + sec_auditlog_write(msr, text, strlen(text)); + } + } + } + } + /* AUDITLOG_PART_ENDMARKER */ + text = apr_psprintf(msr->mp, "\n--%s-%c--\n", msr->new_auditlog_boundary, AUDITLOG_PART_ENDMARKER); + sec_auditlog_write(msr, text, strlen(text)); + + /* Return here if we were writing to a serial log + * as it does not need an index file. + */ + if (msr->txcfg->auditlog_type != AUDITLOG_CONCURRENT) { + sec_auditlog_write(msr, "\n", 1); + + /* Unlock the mutex we used to serialise access to the audit log file. */ + rc = apr_global_mutex_unlock(msr->modsecurity->auditlog_lock); + if (rc != APR_SUCCESS) { + msr_log(msr, 1, "Audit log: Failed to unlock global mutex: %s", + get_apr_error(msr->mp, rc)); + } + + return; + } + + /* From here on only concurrent-style processing. */ + + /* File handle might already be closed after write failure. */ + if (msr->new_auditlog_fd) { + apr_file_close(msr->new_auditlog_fd); + } + + /* Write an entry to the index file */ + + /* Calculate hash of the entry. */ + apr_md5_final(md5hash, &msr->new_auditlog_md5ctx); + + str2 = apr_psprintf(msr->mp, "%s %d %d md5:%s", msr->new_auditlog_filename, 0, + msr->new_auditlog_size, bytes2hex(msr->mp, md5hash, 16)); + if (str2 == NULL) return; + + /* We do not want the index line to be longer than 3980 bytes. */ + limit = 3980; + was_limited = 0; + + /* If we are logging to a pipe we need to observe and + * obey the pipe atomic write limit - PIPE_BUF. For + * more details see the discussion in sec_guardian_logger code. + */ + if (msr->txcfg->auditlog_name[0] == '|') { + if (PIPE_BUF < limit) { + limit = PIPE_BUF; + } + } + + limit = limit - strlen(str2) - 5; + if (limit <= 0) { + msr_log(msr, 1, "Audit Log: Atomic PIPE write buffer too small: %d", PIPE_BUF); + return; + } + + str1 = construct_log_vcombinedus_limited(msr, limit, &was_limited); + if (str1 == NULL) return; + + if (was_limited == 0) { + text = apr_psprintf(msr->mp, "%s %s \n", str1, str2); + } else { + text = apr_psprintf(msr->mp, "%s %s L\n", str1, str2); + } + if (text == NULL) return; + + nbytes = strlen(text); + if (msr->txcfg->debuglog_level >= 9) { + msr_log(msr, 9, "Audit Log: Writing %" APR_SIZE_T_FMT " bytes to primary concurrent index", nbytes); + } + apr_file_write_full(msr->txcfg->auditlog_fd, text, nbytes, &nbytes_written); + + /* Write to the secondary audit log if we have one */ + if (msr->txcfg->auditlog2_fd != NULL) { + if (msr->txcfg->debuglog_level >= 9) { + msr_log(msr, 9, "Audit Log: Writing %" APR_SIZE_T_FMT " bytes to secondary concurrent index", nbytes); + } + apr_file_write_full(msr->txcfg->auditlog2_fd, text, nbytes, &nbytes_written); + } +} + +/* + * Handler for audit log writers. + */ +void sec_audit_logger(modsec_rec *msr) { + #ifdef WITH_YAJL + if (msr->txcfg->auditlog_format == AUDITLOGFORMAT_JSON) { + sec_audit_logger_json(msr); + } else { + #endif + sec_audit_logger_native(msr); + #ifdef WITH_YAJL + } + #endif +} diff --git a/apache2/msc_logging.h b/apache2/msc_logging.h index 75af9da7ae..5378ddc659 100644 --- a/apache2/msc_logging.h +++ b/apache2/msc_logging.h @@ -22,6 +22,11 @@ #define AUDITLOG_SERIAL 0 #define AUDITLOG_CONCURRENT 1 +#ifdef WITH_YAJL +#define AUDITLOGFORMAT_JSON 0 +#define AUDITLOGFORMAT_NATIVE 1 +#endif + #define AUDITLOG_PART_FIRST 'A' #define AUDITLOG_PART_HEADER 'A' #define AUDITLOG_PART_REQUEST_HEADERS 'B' diff --git a/apache2/msc_logging_json.h b/apache2/msc_logging_json.h new file mode 100644 index 0000000000..e49b98c29d --- /dev/null +++ b/apache2/msc_logging_json.h @@ -0,0 +1,13 @@ +#include + +#include + +#define yajl_string(g, s) yajl_gen_string(g, (const unsigned char *)s, strlen(s)) + +#define yajl_kv_null(g, k) yajl_string(g, k); yajl_gen_null(g) + +#define yajl_kv_int(g, k, v) yajl_string(g, k); yajl_gen_integer(g, v) + +#define yajl_kv_string(g, k, v) yajl_string(g, k); yajl_string(g, v) + +#define yajl_kv_bool(g, k, v) yajl_string(g, k); yajl_gen_bool(g, v) diff --git a/tools/parse_modsec.pl b/tools/parse_modsec.pl new file mode 100755 index 0000000000..8c3b043e95 --- /dev/null +++ b/tools/parse_modsec.pl @@ -0,0 +1,603 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use DateTime::Format::Strptime; +use Getopt::Long qw(:config no_ignore_case bundling); +use JSON; +use List::MoreUtils qw(any); +use NetAddr::IP; +use Try::Tiny; + +=pod + +=head1 NAME + +parse_modsec.pl + +=head1 SYNOPSIS + +Parse ModSecurity logs generated as JSON + +=head1 USAGE + +Usage: $0 [h] [Htsrfdbalspjv] + -H|--host Search rules based on the Host request header + -t|--transaction-id Search rules based on the unique transaction ID + -s|--source-ip Search rules based on the client IP address (can be presented as an address or CIDR block) + -r|--rule-id Search rules based on the rule ID + -f|--filter Define advanced filters to walk through JSON tree + -d|--delim Define a delimiter for advanced filters. Default is '.' + -b|--before Search rules before this timeframe + -a|--after Search rules after this timeframe + -l|--logpath Define a path to read JSON logs from. Default is '/var/log/modsec_audit.log' + -S|--stdin Read rules from stdin instead of an on-disk file + -p|--partial-chains Do not prune partial chain matches + -j|--json Print rule entries as a JSON blob, rather than nice formatting + -v|--verbose Be verbose about various details such as JSON parse failures and log data + + +=head2 FILTERS + +ModSecurity JSON audit logs are written as a series of atomic JSON documents, as opposed to a single, monolithic structure. This program will read through all JSON documents provided, making certain assumptions about the structure of each document, and will print out relevent entries based on the parameters provided. Log entries can be filtered by key-value pairs; given a key at an arbitrary level in the document, test the value of the key against an expected expression. The best way to understand this is with examples (see EXAMPLES for further details). + +Filter values are treated as regular expressions. Each match is anchored by '^' and'$', meaning that values that do not contain PCRE metacharacters will essentially match by string equality. + +Filters can be used to search a specific key-pair value, or an array of values. Arrays containing sub-elements can also be traversed. Arrays are identified in a filter key expression through the use of the '%' metacharacter. See EXAMPLES for further discussion of filter key expression syntax. + +Multiple filters can be provided, and are used in a logical AND manner (that is, an entry must match all given filters). + + +=head2 FILTER EXAMPLES + +Examine the following entry: + + { + "foo": "bar", + "foo2": "bar2", + "qux": { + "quux": "corge", + "grault": "garply", + "wal.do": "fred" + }, + "baz": [ + "bat", + "bam", + "bif" + ], + "bal": [ + { "hello": "world" }, + { "how": "are" }, + { "you": "doing" } + ] + } + +A search for the top level key "foo" containing the value "bar" would look like: + + -f foo=bar + +However, the following will not result in the entry being matched: + + -f foo=bar2 + +This is because the value of "foo" in the JSON document does not match the regex "^bar2$" + +Searching sub-keys is possible by providing the traversal path as the filter key, separated by a delimiter. By default the delimiter is '.'. For example, to search the value of the "grault" subkey within the "qux" key: + + -f qux.grault= + +Search arrays is also possible with the use of the '%' metacharacter, which should be used in place of a key name in the filter expression. For example, to search through all the values in the "baz" top-level key: + + -f baz.%= + +Searching for specific keys that are live in an array is also possible. For example, to search for the value of the "hello" key within the top-level key "bal" array: + + -f bal.%.hello= + +If any key contains a period character (.), you can specify an alternative delimiter using the '-d' option. To search the "wal.do" key within "qux": + + -d @ quz@wal.do= + + +=head2 SHORTCUTS + +Quick searches of on-disk log files likely will be performed using simple queries. Rather than forcing users to write a filter for common parameters, we provide a few shortcuts as options. These shortcuts can be combined with additional filters for complex searches. Provided shortcuts (and the matching filter key expression) are listed below: + + Host: request.headers.Host + Transaction ID: transaction.transaction_id + Rule ID: matched_rules.%.rules.%.actionset.id + +Additionally, the '--source-ip' argument allows for searching rule entries based on the remote IP address. This option searches based on CIDR blocks, instead of the filter searching described above. + +=head2 TIMEFRAME + +Log entries can further be narrowed by time range. The --before and --after flags can be used to return only entries that returned before or after (or both) a given date and time. Values for these options can be provided by the following syntax: + + ^\d+[dDhHmM]?$ + +For example, to limit the search of entries to between one and 4 days ago: + + -a 4d -b 1d + +You may provide one, both, or neither of these flags. + + +=head2 USAGE EXAMPLES + +Print all log entries from the default log location: + + parse_modsec.pl + +Print all log entries and show more detailed information, such as response data and matched rule details + + parse_modsec.pl -v + +Print entries matching a specific source IP: + + parse_modsec.pl -s 1.2.3.4 + +Print entries matching a source IP in a given subnet: + + parse_modsec.pl -s 1.2.3.0/24 + +Print entries matching a given host and all its sub domains: + + parse_modsec.pl -H .*example.com + +Print entries matching a specific rule ID, that occurred within the last 12 hours: + + parse_modsec.pl -r 123456 -a 12h + +Print entries matching a given rule ID, even if that ID was present in a partial chain: + + parse_modsec.pl -r 123456 -p + +Print entries that contain an HTTP status code 403 + + parse_modsec.pl -f response.status=403 + +Print entries that contain an HTTP GET request with a 'Content-Length' header + + parse_modsec.pl -f request.headers.Content-Length=.* -f request.request_line=GET.* + +=cut + +sub usage { + print <<"_EOF"; +Usage: $0 [h] [Htsrfdbalspjv] + -h|--help Print this help + -H|--host Search rules based on the Host request header + -t|--transaction-id Search rules based on the unique transaction ID + -s|--source-ip Search rules based on the client IP address (can be presented as an address or CIDR block) + -r|--rule-id Search rules based on the rule ID + -f|--filter Define advanced filters to walk through JSON tree + -d|--delim Define a delimiter for advanced filters. Default is '.' + -b|--before Search rules before this timeframe + -a|--after Search rules after this timeframe + -l|--logpath Define a path to read JSON logs from. Default is '/var/log/modsec_audit.log' + -S|--stdin Read rules from stdin instead of an on-disk file + -p|--partial-chains Do not prune partial chain matches + -j|--json Print rule entries as a JSON blob, rather than nice formatting + -v|--verbose Be verbose about various details such as JSON parse failures and log data + + For detailed explanations of various options and example usages, see 'perldoc $0' + +_EOF + exit 1; +} + +# figure the number of seconds based on the command-line option +sub parse_duration { + my ($duration) = @_; + + if ($duration =~ /^(\d+)[dD]$/) { + return $1 * 60 * 60 * 24; + } elsif ($duration =~ /^(\d+)[hH]$/) { + return $1 * 60 * 60; + } elsif ($duration =~ /^(\d+)[mM]$/) { + return $1 * 60; + } elsif ($duration =~ /^(\d+)[sS]?$/) { + return $1; + } else { + die "Couldn't parse duration $duration!\n"; + } +} + +# build a DateTime representative of the past +sub build_datetime { + my ($duration) = @_; + + return if !$duration; + return DateTime->now()->subtract(seconds => parse_duration($duration)); +} + +# determine if the log entry occurred within the given timeframe +sub within_timeframe { + my ($args) = @_; + my $entry = $args->{entry}; + my $before = $args->{before}; + my $after = $args->{after}; + my $timestamp = parse_modsec_timestamp($entry->{transaction}->{time}); + + return (defined $before ? $timestamp < $before : 1) && + (defined $after ? $timestamp > $after : 1); +} + +# sigh... +sub parse_modsec_timestamp { + my ($input) = @_; + + my $format = '%d/%b/%Y:%H:%M:%S -%z'; + my $locale = 'en_US'; + + my $strp = DateTime::Format::Strptime->new( + pattern => $format, + locale => $locale, + ); + + return $strp->parse_datetime($input); +} + +# figure out if we're reading from a file or stdin +# return a file handle representation of our data +sub get_input { + my ($args) = @_; + my $logpath = $args->{logpath}; + my $stdin = $args->{stdin}; + my $fh; + + $stdin ? + $fh = *STDIN : + open $fh, '<', $logpath or die $!; + + return $fh; +} + +# figure if the target address/cidr contains the entry's remote address +sub cidr_match { + my ($args) = @_; + my $entry = $args->{entry}; + my $target = $args->{target}; + my $client_ip = $entry->{transaction}->{remote_address}; + + return $target ? $target->contains(NetAddr::IP->new($client_ip)) : 1; +} + +# given a file handle, return an arrayref representing pertinent rule entries +sub grok_input { + my ($args) = @_; + my $fh = $args->{fh}; + my $filters = $args->{filters}; + my $delim = $args->{delim}; + my $source_ip = $args->{source_ip}; + my $before = $args->{before}; + my $after = $args->{after}; + my $partial = $args->{partial}; + my $verbose = $args->{verbose}; + + my @ref; + + while (my $line = <$fh>) { + my $entry; + + try { + $entry = decode_json($line); + } catch { + warn "Could not decode as JSON:\n$line\n" if $verbose; + }; + + next if !$entry; + + skim_entry({ + entry => $entry, + partial => $partial, + }); + + next if !filter({ + filters => $filters, + data => $entry, + delim => $delim, + }); + + next if !cidr_match({ + entry => $entry, + target => $source_ip, + }); + + next if !within_timeframe({ + entry => $entry, + before => $before, + after => $after, + }); + + push @ref, $entry; + } + + return \@ref; +} + +# get rid of partial chains and other noise +sub skim_entry { + my ($args) = @_; + my $entry = $args->{entry}; + my $partial = $args->{partial}; + my $ctr = 0; + + for my $matched_rule (@{$entry->{matched_rules}}) { + splice @{$entry->{matched_rules}}, $ctr++, 1 + if $matched_rule->{chain} && !$matched_rule->{full_chain_match} && !$partial; + } +} + +# print entries after filtering and skimming +sub print_matches { + my ($args) = @_; + my $ref = $args->{ref}; + my $json = $args->{json}; + my $verbose = $args->{verbose}; + + for my $entry (@{$ref}) { + if ($json) { + print encode_json($entry) . "\n"; + } else { + printf "\n%s\n", '=' x 80; + + my $transaction = $entry->{transaction}; + my $request = $entry->{request}; + my $response = $entry->{response}; + my $audit_data = $entry->{audit_data}; + my $matched_rules = $entry->{matched_rules}; + + if ($transaction) { + printf "%s\nTransaction ID: %s\nIP: %s\n\n", + parse_modsec_timestamp($transaction->{time}), + $transaction->{transaction_id}, + $transaction->{remote_address}; + } + + printf "%s\n", $request->{request_line} + if $request->{request_line}; + + if ($request->{headers}) { + for my $header (sort keys %{$request->{headers}}) { + printf "%s: %s\n", $header, $request->{headers}->{$header}; + } + } + + if ($verbose) { + print join ("\n", @{$request->{body}}) . "\n" + if $request->{body}; + + printf "\n%s %s\n", $response->{protocol}, $response->{status} + if $response->{protocol} && $response->{status}; + + for my $header (sort keys %{$response->{headers}}) { + printf "%s: %s\n", $header, $response->{headers}->{$header}; + } + + printf "\n%s\n", $response->{body} + if $response->{body}; + } + + for my $chain (@{$matched_rules}) { + print "\n"; + my @extra_data; + my $ctr = 0; + + for my $rule (@{$chain->{rules}}) { + printf $rule->{is_matched} ? "%s%s\n" : "%s#%s\n", ' ' x $ctr++, $rule->{unparsed}; + push @extra_data, $rule->{actionset}->{msg} if $rule->{actionset}->{msg}; + push @extra_data, $rule->{actionset}->{logdata} if $rule->{actionset}->{logdata}; + } + + printf "\n-- %s\n", join "\n-- ", @extra_data + if @extra_data && $verbose; + } + + printf "\n-- %s\n\n", $audit_data->{action}->{message} + if $audit_data->{action}->{message} && $verbose; + + printf "%s\n", '=' x 80; + } + } +} + +# filter out rule entries based on given filter definitions +sub filter { + my ($args) = @_; + my $filters = $args->{filters}; + my $data = $args->{data}; + my $delim = $args->{delim}; + + my $valid_match = 1; + + for my $field (keys %{$filters}) { + my $args = { + field => $field, + match => $filters->{$field}, + delim => $delim, + hash => $data, + }; + + if (!match($args)) { + $valid_match = 0; + last; + } + } + return $valid_match; +} + +# match a hash element (may be an array of elements) against a given pattern +sub match { + my ($args) = @_; + my $delim = $args->{delim}; + my $hash = $args->{hash}; + my $match = $args->{match}; + my $field = $args->{field}; + + my @matches = traverse($args); + + return any { $_ =~ m/^$match$/ } @matches; +} + +# walk a JSON structure in search of a given key +# borrowed and butchered from view_signatures.pl +sub traverse { + my ($args) = @_; + my $delim = $args->{delim}; + my $hash = $args->{hash}; + my $match = $args->{match}; + my $field = $args->{field}; + my @traverse = split /\Q$delim\E/, $field; + + my @values; + + while (my $level = shift @traverse) { + if ($level eq '%') { + # match() is called in a list context + # so if we have a bad filter expression + # we need to bail in a sensible way + return () if ref $hash ne 'ARRAY'; + + for my $subhash (@{$hash}) { + my @match = traverse({ + hash => $subhash, + delim => $delim, + match => $match, + field => join $delim, @traverse, + }); + push(@values, @match) if @match; + } + } elsif (ref $hash eq 'HASH' && defined $hash->{$level}) { + $hash = $hash->{$level}; + } else { + $hash = undef; + last; + } + } + + push @values, $hash if defined $hash; + return ref $hash eq 'ARRAY' ? @{$hash} : @values; +} + +# merge any custom-defined filters with shortcut options +sub merge_filters { + my ($args) = @_; + my $filters = $args->{filters}; + my $delim = $args->{delim}; + + my $lookup = { + host => [qw(request headers Host)], + transaction_id => [qw(transaction transaction_id)], + rule_id => [qw(matched_rules % rules % actionset id)] + }; + + for my $field (keys %{$lookup}) { + if (defined $args->{$field}) { + my $key = build_filter_key({ + elements => $lookup->{$field}, + delim => $delim, + }); + + $filters->{$key} = $args->{$field}; + } + } +} + +# stub sub to build a filter key +sub build_filter_key { + my ($args) = @_; + my $elements = $args->{elements}; + my $delim = $args->{delim}; + + return join $delim, @{$elements}; +} + +sub main { + my ( + $host, $transaction_id, # shortcuts + $source_ip, $rule_id, # shortcuts + %filters, $delim, # used by filters/match/traverse to grok the input + $before, $after, # timeframe + $logpath, $stdin, # input + $partial_chains, $json, # output + $verbose, # output + $fh, $parsed_ref, # data structures + ); + + GetOptions( + 'h|help' => sub { usage(); }, + 'H|host=s' => \$host, + 't|transaction-id=s' => \$transaction_id, + 's|source-ip=s' => \$source_ip, + 'r|rule-id=i' => \$rule_id, + 'f|filter=s' => \%filters, + 'd|delim=s' => \$delim, + 'b|before=s' => \$before, + 'a|after=s' => \$after, + 'l|logpath=s' => \$logpath, + 'S|stdin' => \$stdin, + 'p|partial-chains' => \$partial_chains, + 'j|json' => \$json, + 'v|verbose' => \$verbose, + ) or usage(); + + # sanity checks + die "Cannot parse both a file and stdin\n" + if defined $logpath && defined $stdin; + + if (defined $source_ip) { + $source_ip = NetAddr::IP->new($source_ip); + die "Invalid IP/CIDR provided for source IP argument\n" + unless $source_ip; + } + + # build_datetime will bail out if an invalid format was given + $before = build_datetime($before); + $after = build_datetime($after); + + # figure where we're reading from + $logpath ||= '/var/log/mod_sec/modsec_audit.log'; + $fh = get_input({ + logpath => $logpath, + stdin => $stdin, + }); + + die "Could not get a handle on your data\n" + unless $fh; + + # build the filters by merging shortcut options with custom filter directives + $delim ||= '.'; + merge_filters({ + filters => \%filters, + host => $host, + transaction_id => $transaction_id, + source_ip => $source_ip, + rule_id => $rule_id, + delim => $delim, + }); + + # walk through our input, getting an arrayref of valid entries based on filters and timeframe + $parsed_ref = grok_input({ + fh => $fh, + filters => \%filters, + delim => $delim, + source_ip => $source_ip, + before => $before, + after => $after, + partial => $partial_chains, + verbose => $verbose, + }); + + close $fh || warn $!; + + # show me the money! + print_matches({ + ref => $parsed_ref, + json => $json, + verbose => $verbose, + }); +} + +main();