From b81a9333f45c8a55675bb8f733d5ec9e5675abe8 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 24 Jul 2024 11:32:49 -0600 Subject: [PATCH 1/2] Limit recursion in regex parser --- .../Regex/Parse/Diagnostics.swift | 20 +++++++++++++++++++ Sources/_RegexParser/Regex/Parse/Parse.swift | 19 ++++++++++++++++++ Tests/RegexTests/ParseTests.swift | 15 ++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift index f5c0d7075..53b2290e9 100644 --- a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift +++ b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift @@ -87,6 +87,9 @@ enum ParseError: Error, Hashable { case expectedCalloutArgument + // Excessively nested groups (i.e. recursion) + case nestingTooDeep + // MARK: Semantic Errors case unsupported(String) @@ -241,6 +244,9 @@ extension ParseError: CustomStringConvertible { return "character '\(lhs)' must compare less than or equal to '\(rhs)'" case .notQuantifiable: return "expression is not quantifiable" + + case .nestingTooDeep: + return "group is too deeply nested" } } } @@ -302,6 +308,10 @@ extension Diagnostic { public struct Diagnostics: Hashable { public private(set) var diags = [Diagnostic]() + // In the event of an unrecoverable parse error, set this + // to avoid emitting spurious diagnostics. + internal var suppressFurtherDiagnostics = false + public init() {} public init(_ diags: [Diagnostic]) { self.diags = diags @@ -309,11 +319,17 @@ public struct Diagnostics: Hashable { /// Add a new diagnostic to emit. public mutating func append(_ diag: Diagnostic) { + guard !suppressFurtherDiagnostics else { + return + } diags.append(diag) } /// Add all the diagnostics of another diagnostic collection. public mutating func append(contentsOf other: Diagnostics) { + guard !suppressFurtherDiagnostics else { + return + } diags.append(contentsOf: other.diags) } @@ -321,6 +337,10 @@ public struct Diagnostics: Hashable { /// This assumes that `other` was the same as `self`, but may have additional /// diagnostics added to it. public mutating func appendNewFatalErrors(from other: Diagnostics) { + guard !suppressFurtherDiagnostics else { + return + } + let newDiags = other.diags.dropFirst(diags.count) for diag in newDiags where diag.behavior == .fatalError { append(diag) diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index d9b6f23a0..1fdadd8de 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -73,6 +73,9 @@ struct ParsingContext { /// A set of used group names. private var usedGroupNames = Set() + /// The depth of calls to parseNode (recursion depth plus 1) + fileprivate var parseDepth = 0 + /// The syntax options currently set. fileprivate(set) var syntax: SyntaxOptions @@ -88,6 +91,8 @@ struct ParsingContext { } } + fileprivate var maxParseDepth: Int { 64 } + init(syntax: SyntaxOptions) { self.syntax = syntax } @@ -188,6 +193,20 @@ extension Parser { /// Alternation -> Concatenation ('|' Concatenation)* /// mutating func parseNode() -> AST.Node { + // Excessively nested groups is a common DOS attack, so limit + // our recursion. + context.parseDepth += 1 + defer { context.parseDepth -= 1 } + guard context.parseDepth < context.maxParseDepth else { + self.errorAtCurrentPosition(.nestingTooDeep) + + // This is not generally recoverable and further errors will be + // incorrect + diags.suppressFurtherDiagnostics = true + + return .empty(.init(loc(src.currentPosition))) + } + let _start = src.currentPosition if src.isEmpty { return .empty(.init(loc(_start))) } diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 75df0d509..cbb9e6b32 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -3322,6 +3322,21 @@ extension RegexTests { diagnosticTest("(*LIMIT_DEPTH=-1", .expectedNumber("", kind: .decimal), .expected(")"), unsupported: true) } + func testMaliciousNesting() { + // Excessively nested subpatterns is a common DOS attack + diagnosticTest( + String(repeating: "(", count: 500) + + "a" + + String(repeating: ")*", count: 500), + .nestingTooDeep) + + diagnosticTest( + String(repeating: "(?:", count: 500) + + "a" + + String(repeating: ")*", count: 500), + .nestingTooDeep) + } + func testDelimiterLexingErrors() { // MARK: Printable ASCII From 4524f36c9f88e89d3b91cf53a4764700fc6d5fb3 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 29 Jul 2024 08:15:54 -0700 Subject: [PATCH 2/2] Also check for custom char class nesting --- Sources/_RegexParser/Regex/Parse/Parse.swift | 13 +++++++++++++ Tests/RegexTests/ParseTests.swift | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index 1fdadd8de..3ec852aa8 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -523,6 +523,19 @@ extension Parser { mutating func parseCustomCharacterClass( _ start: Source.Located ) -> CustomCC { + // Excessively nested recursion is a common DOS attack, so limit + // our recursion. + context.parseDepth += 1 + defer { context.parseDepth -= 1 } + guard context.parseDepth < context.maxParseDepth else { + self.errorAtCurrentPosition(.nestingTooDeep) + + // This is not generally recoverable and further errors will be + // incorrect + diags.suppressFurtherDiagnostics = true + return .init(start, [], start.location) + } + let alreadyInCCC = context.isInCustomCharacterClass context.isInCustomCharacterClass = true defer { context.isInCustomCharacterClass = alreadyInCCC } diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index cbb9e6b32..7f0dd811e 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -3335,6 +3335,13 @@ extension RegexTests { + "a" + String(repeating: ")*", count: 500), .nestingTooDeep) + + diagnosticTest( + String(repeating: "[", count: 500) + + "a" + + String(repeating: "]*", count: 500), + .nestingTooDeep) + } func testDelimiterLexingErrors() {