From 289013f72d0b875a36a542b0cda896a614425b34 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 16 May 2024 09:40:20 -0700 Subject: [PATCH 1/3] Handle refactoring changeMatchingOptions --- .../Regex/Printing/PrettyPrinter.swift | 17 +++++ .../_StringProcessing/PrintAsPattern.swift | 63 +++++++++++++++++-- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift index 8ddcd73c7..6c007f25b 100644 --- a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift +++ b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift @@ -43,6 +43,9 @@ public struct PrettyPrinter { // The current default quantification behavior public var quantificationBehavior: AST.Quantification.Kind = .eager + + // A stack of the current added inline matching options, e.g. (?s) + public var inlineMatchingOptions: [[AST.MatchingOption]] = [] } // MARK: - Raw interface @@ -142,4 +145,18 @@ extension PrettyPrinter { printIndented(f) print(endDelimiter) } + + /// Pushes the list of matching options to the current stack of other matching + /// options and increases the indentation level by 1. + public mutating func pushMatchingOptions(_ options: [AST.MatchingOption]) { + indentLevel += 1 + inlineMatchingOptions.append(options) + } + + /// Pops the most recent list of matching options from the printer and + /// decreases the indentation level by 1. + public mutating func popMatchingOptions() -> [AST.MatchingOption] { + indentLevel -= 1 + return inlineMatchingOptions.removeLast() + } } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 31321ef16..f40ce2268 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -74,6 +74,51 @@ extension PrettyPrinter { printBlock("Regex") { printer in printer.printAsPattern(convertedFromAST: node, isTopLevel: true) } + + printInlineMatchingOptions() + } + + mutating func printInlineMatchingOptions() { + for matchingOptions in inlineMatchingOptions { + let options = popMatchingOptions() + + printIndented { printer in + for option in options { + switch option.kind { + case .asciiOnlyDigit: + printer.print(".asciiOnlyDigits()") + + case .asciiOnlyPOSIXProps: + printer.print(".asciiOnlyCharacterClasses()") + + case .asciiOnlySpace: + printer.print(".asciiOnlyWhitespace()") + + case .asciiOnlyWord: + printer.print(".asciiOnlyWordCharacters()") + + case .caseInsensitive: + printer.print(".ignoresCase()") + + case .multiline: + printer.print(".anchorsMatchLineEndings()") + + case .reluctantByDefault: + // This is handled by altering every OneOrMore, etc by changing each + // individual repetition behavior instead of creating a nested regex. + continue + + case .singleLine: + printer.print(".dotMatchesNewlines()") + + default: + break + } + } + } + + print("}") + } } // FIXME: Use of back-offs like height and depth @@ -424,7 +469,7 @@ extension PrettyPrinter { // Also in the same vein, if we have a few atom members but no // nonAtomMembers, then we can emit a single .anyOf(...) for them. if !charMembers.isEmpty, nonCharMembers.isEmpty { - let anyOf = ".anyOf(\(charMembers))" + let anyOf = "CharacterClass.anyOf(\(charMembers))" indent() @@ -502,7 +547,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(String(c)._quoted)))") } else { - output(".anyOf(\(String(c)._quoted))") + output("CharacterClass.anyOf(\(String(c)._quoted))") } case let .scalar(s): @@ -510,7 +555,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(s._dslBase._bareQuoted)))") } else { - output(".anyOf(\(s._dslBase._bareQuoted))") + output("CharacterClass.anyOf(\(s._dslBase._bareQuoted))") } case let .unconverted(a): @@ -538,7 +583,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(s._quoted)))") } else { - output(".anyOf(\(s._quoted))") + output("CharacterClass.anyOf(\(s._quoted))") } case .trivia(_): @@ -1285,10 +1330,20 @@ extension DSLTree.Atom { switch add.kind { case .reluctantByDefault: printer.quantificationBehavior = .reluctant + + // Don't create a nested Regex for (?U), we handle this by altering + // every individual repetitionBehavior for things like OneOrMore. + if matchingOptions.ast.adding.count == 1 { + return nil + } + default: break } } + + printer.print("Regex {") + printer.pushMatchingOptions(matchingOptions.ast.adding) } return nil From ac1b821d4b9d05adcdccfb5da5281093b805a1da Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 16 May 2024 10:04:43 -0700 Subject: [PATCH 2/3] Support removing matching options --- .../Regex/Printing/PrettyPrinter.swift | 14 +++-- .../_StringProcessing/PrintAsPattern.swift | 44 ++++++++++----- Tests/RegexTests/RenderDSLTests.swift | 53 ++++++++++++++++++- 3 files changed, 91 insertions(+), 20 deletions(-) diff --git a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift index 6c007f25b..0d04b8a40 100644 --- a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift +++ b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift @@ -44,8 +44,9 @@ public struct PrettyPrinter { // The current default quantification behavior public var quantificationBehavior: AST.Quantification.Kind = .eager - // A stack of the current added inline matching options, e.g. (?s) - public var inlineMatchingOptions: [[AST.MatchingOption]] = [] + // A stack of the current added inline matching options, e.g. (?s) and a + // boolean indicating true = added (?s) and false = removed (?-s). + public var inlineMatchingOptions: [([AST.MatchingOption], Bool)] = [] } // MARK: - Raw interface @@ -148,14 +149,17 @@ extension PrettyPrinter { /// Pushes the list of matching options to the current stack of other matching /// options and increases the indentation level by 1. - public mutating func pushMatchingOptions(_ options: [AST.MatchingOption]) { + public mutating func pushMatchingOptions( + _ options: [AST.MatchingOption], + isAdded: Bool + ) { indentLevel += 1 - inlineMatchingOptions.append(options) + inlineMatchingOptions.append((options, isAdded)) } /// Pops the most recent list of matching options from the printer and /// decreases the indentation level by 1. - public mutating func popMatchingOptions() -> [AST.MatchingOption] { + public mutating func popMatchingOptions() -> ([AST.MatchingOption], Bool) { indentLevel -= 1 return inlineMatchingOptions.removeLast() } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index f40ce2268..dc6f34285 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -79,29 +79,29 @@ extension PrettyPrinter { } mutating func printInlineMatchingOptions() { - for matchingOptions in inlineMatchingOptions { - let options = popMatchingOptions() + while !inlineMatchingOptions.isEmpty { + let (options, condition) = popMatchingOptions() printIndented { printer in for option in options { switch option.kind { case .asciiOnlyDigit: - printer.print(".asciiOnlyDigits()") + printer.print(".asciiOnlyDigits(\(condition))") case .asciiOnlyPOSIXProps: - printer.print(".asciiOnlyCharacterClasses()") + printer.print(".asciiOnlyCharacterClasses(\(condition))") case .asciiOnlySpace: - printer.print(".asciiOnlyWhitespace()") + printer.print(".asciiOnlyWhitespace(\(condition))") case .asciiOnlyWord: - printer.print(".asciiOnlyWordCharacters()") + printer.print(".asciiOnlyWordCharacters(\(condition))") case .caseInsensitive: - printer.print(".ignoresCase()") + printer.print(".ignoresCase(\(condition))") case .multiline: - printer.print(".anchorsMatchLineEndings()") + printer.print(".anchorsMatchLineEndings(\(condition))") case .reluctantByDefault: // This is handled by altering every OneOrMore, etc by changing each @@ -109,7 +109,7 @@ extension PrettyPrinter { continue case .singleLine: - printer.print(".dotMatchesNewlines()") + printer.print(".dotMatchesNewlines(\(condition))") default: break @@ -1326,14 +1326,30 @@ extension DSLTree.Atom { return ("/* TODO: symbolic references */", false) case .changeMatchingOptions(let matchingOptions): - for add in matchingOptions.ast.adding { - switch add.kind { + let options: [AST.MatchingOption] + let isAdd: Bool + + if matchingOptions.ast.removing.isEmpty { + options = matchingOptions.ast.adding + isAdd = true + } else { + options = matchingOptions.ast.removing + isAdd = false + } + + for option in options { + switch option.kind { case .reluctantByDefault: - printer.quantificationBehavior = .reluctant + if isAdd { + printer.quantificationBehavior = .reluctant + } else { + printer.quantificationBehavior = .eager + } + // Don't create a nested Regex for (?U), we handle this by altering // every individual repetitionBehavior for things like OneOrMore. - if matchingOptions.ast.adding.count == 1 { + if options.count == 1 { return nil } @@ -1343,7 +1359,7 @@ extension DSLTree.Atom { } printer.print("Regex {") - printer.pushMatchingOptions(matchingOptions.ast.adding) + printer.pushMatchingOptions(options, isAdded: isAdd) } return nil diff --git a/Tests/RegexTests/RenderDSLTests.swift b/Tests/RegexTests/RenderDSLTests.swift index aa6b1fa2b..8bbf14179 100644 --- a/Tests/RegexTests/RenderDSLTests.swift +++ b/Tests/RegexTests/RenderDSLTests.swift @@ -306,7 +306,7 @@ extension RenderDSLTests { func testCharacterClass() throws { try testConversion(#"[abc]+"#, #""" Regex { - OneOrMore(.anyOf("abc")) + OneOrMore(CharacterClass.anyOf("abc")) } """#) @@ -337,5 +337,56 @@ extension RenderDSLTests { } } """#) + + try testConversion(#"[^i]*"#, #""" + Regex { + ZeroOrMore(CharacterClass.anyOf("i").inverted) + } + """#) + } + + func testChangeMatchingOptions() throws { + try testConversion(#"(?s).*(?-s).*"#, #""" + Regex { + Regex { + ZeroOrMore { + /./ + } + Regex { + ZeroOrMore { + /./ + } + } + .dotMatchesNewlines(false) + } + .dotMatchesNewlines(true) + } + """#) + + try testConversion(#"(?U)a+(?-U)a+"#, #""" + Regex { + OneOrMore(.reluctant) { + "a" + } + OneOrMore { + "a" + } + } + """#) + + try testConversion(#"(?sim)hello(?-s)world"#, #""" + Regex { + Regex { + "hello" + Regex { + "world" + } + .dotMatchesNewlines(false) + } + .dotMatchesNewlines(true) + .ignoresCase(true) + .anchorsMatchLineEndings(true) + } + """#) } } From 0ba5d44eb52b433866c2fdbf62a2ea0ee988ca82 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 16 May 2024 10:23:21 -0700 Subject: [PATCH 3/3] Explicitly handle (?x) one last time --- Sources/_StringProcessing/PrintAsPattern.swift | 7 +++++++ Tests/RegexTests/RenderDSLTests.swift | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index dc6f34285..22dd3756d 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -1339,6 +1339,13 @@ extension DSLTree.Atom { for option in options { switch option.kind { + case .extended: + // We don't currently support (?x) in the DSL, so if we see it, just + // do nothing. + if options.count == 1 { + return nil + } + case .reluctantByDefault: if isAdd { printer.quantificationBehavior = .reluctant diff --git a/Tests/RegexTests/RenderDSLTests.swift b/Tests/RegexTests/RenderDSLTests.swift index 8bbf14179..19ab4c35c 100644 --- a/Tests/RegexTests/RenderDSLTests.swift +++ b/Tests/RegexTests/RenderDSLTests.swift @@ -230,7 +230,7 @@ extension RenderDSLTests { """#) try testConversion(#"[abc\u{301}]"#, #""" Regex { - One(.anyOf("abc\u{301}")) + One(CharacterClass.anyOf("abc\u{301}")) } """#) @@ -248,7 +248,7 @@ extension RenderDSLTests { try testConversion(#"(?x) [ a b c \u{301} ] "#, #""" Regex { - One(.anyOf("abc\u{301}")) + One(CharacterClass.anyOf("abc\u{301}")) } """#)