From 56cddfa86698c31bd1cec02035183b4f153f5775 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 16 May 2024 13:48:06 -0700 Subject: [PATCH] Merge pull request #738 from Azoy/fix-som-refactors Support change matching options in Regex refactoring --- .../Regex/Printing/PrettyPrinter.swift | 21 +++++ .../_StringProcessing/PrintAsPattern.swift | 92 +++++++++++++++++-- Tests/RegexTests/RenderDSLTests.swift | 57 +++++++++++- 3 files changed, 160 insertions(+), 10 deletions(-) diff --git a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift index 8ddcd73c7..0d04b8a40 100644 --- a/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift +++ b/Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift @@ -43,6 +43,10 @@ public struct PrettyPrinter { // The current default quantification behavior public var quantificationBehavior: AST.Quantification.Kind = .eager + + // A stack of the current added inline matching options, e.g. (?s) and a + // boolean indicating true = added (?s) and false = removed (?-s). + public var inlineMatchingOptions: [([AST.MatchingOption], Bool)] = [] } // MARK: - Raw interface @@ -142,4 +146,21 @@ extension PrettyPrinter { printIndented(f) print(endDelimiter) } + + /// Pushes the list of matching options to the current stack of other matching + /// options and increases the indentation level by 1. + public mutating func pushMatchingOptions( + _ options: [AST.MatchingOption], + isAdded: Bool + ) { + indentLevel += 1 + inlineMatchingOptions.append((options, isAdded)) + } + + /// Pops the most recent list of matching options from the printer and + /// decreases the indentation level by 1. + public mutating func popMatchingOptions() -> ([AST.MatchingOption], Bool) { + indentLevel -= 1 + return inlineMatchingOptions.removeLast() + } } diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 31321ef16..22dd3756d 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -74,6 +74,51 @@ extension PrettyPrinter { printBlock("Regex") { printer in printer.printAsPattern(convertedFromAST: node, isTopLevel: true) } + + printInlineMatchingOptions() + } + + mutating func printInlineMatchingOptions() { + while !inlineMatchingOptions.isEmpty { + let (options, condition) = popMatchingOptions() + + printIndented { printer in + for option in options { + switch option.kind { + case .asciiOnlyDigit: + printer.print(".asciiOnlyDigits(\(condition))") + + case .asciiOnlyPOSIXProps: + printer.print(".asciiOnlyCharacterClasses(\(condition))") + + case .asciiOnlySpace: + printer.print(".asciiOnlyWhitespace(\(condition))") + + case .asciiOnlyWord: + printer.print(".asciiOnlyWordCharacters(\(condition))") + + case .caseInsensitive: + printer.print(".ignoresCase(\(condition))") + + case .multiline: + printer.print(".anchorsMatchLineEndings(\(condition))") + + case .reluctantByDefault: + // This is handled by altering every OneOrMore, etc by changing each + // individual repetition behavior instead of creating a nested regex. + continue + + case .singleLine: + printer.print(".dotMatchesNewlines(\(condition))") + + default: + break + } + } + } + + print("}") + } } // FIXME: Use of back-offs like height and depth @@ -424,7 +469,7 @@ extension PrettyPrinter { // Also in the same vein, if we have a few atom members but no // nonAtomMembers, then we can emit a single .anyOf(...) for them. if !charMembers.isEmpty, nonCharMembers.isEmpty { - let anyOf = ".anyOf(\(charMembers))" + let anyOf = "CharacterClass.anyOf(\(charMembers))" indent() @@ -502,7 +547,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(String(c)._quoted)))") } else { - output(".anyOf(\(String(c)._quoted))") + output("CharacterClass.anyOf(\(String(c)._quoted))") } case let .scalar(s): @@ -510,7 +555,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(s._dslBase._bareQuoted)))") } else { - output(".anyOf(\(s._dslBase._bareQuoted))") + output("CharacterClass.anyOf(\(s._dslBase._bareQuoted))") } case let .unconverted(a): @@ -538,7 +583,7 @@ extension PrettyPrinter { if wrap { output("One(.anyOf(\(s._quoted)))") } else { - output(".anyOf(\(s._quoted))") + output("CharacterClass.anyOf(\(s._quoted))") } case .trivia(_): @@ -1281,14 +1326,47 @@ extension DSLTree.Atom { return ("/* TODO: symbolic references */", false) case .changeMatchingOptions(let matchingOptions): - for add in matchingOptions.ast.adding { - switch add.kind { + let options: [AST.MatchingOption] + let isAdd: Bool + + if matchingOptions.ast.removing.isEmpty { + options = matchingOptions.ast.adding + isAdd = true + } else { + options = matchingOptions.ast.removing + isAdd = false + } + + for option in options { + switch option.kind { + case .extended: + // We don't currently support (?x) in the DSL, so if we see it, just + // do nothing. + if options.count == 1 { + return nil + } + case .reluctantByDefault: - printer.quantificationBehavior = .reluctant + if isAdd { + printer.quantificationBehavior = .reluctant + } else { + printer.quantificationBehavior = .eager + } + + + // Don't create a nested Regex for (?U), we handle this by altering + // every individual repetitionBehavior for things like OneOrMore. + if options.count == 1 { + return nil + } + default: break } } + + printer.print("Regex {") + printer.pushMatchingOptions(options, isAdded: isAdd) } return nil diff --git a/Tests/RegexTests/RenderDSLTests.swift b/Tests/RegexTests/RenderDSLTests.swift index aa6b1fa2b..19ab4c35c 100644 --- a/Tests/RegexTests/RenderDSLTests.swift +++ b/Tests/RegexTests/RenderDSLTests.swift @@ -230,7 +230,7 @@ extension RenderDSLTests { """#) try testConversion(#"[abc\u{301}]"#, #""" Regex { - One(.anyOf("abc\u{301}")) + One(CharacterClass.anyOf("abc\u{301}")) } """#) @@ -248,7 +248,7 @@ extension RenderDSLTests { try testConversion(#"(?x) [ a b c \u{301} ] "#, #""" Regex { - One(.anyOf("abc\u{301}")) + One(CharacterClass.anyOf("abc\u{301}")) } """#) @@ -306,7 +306,7 @@ extension RenderDSLTests { func testCharacterClass() throws { try testConversion(#"[abc]+"#, #""" Regex { - OneOrMore(.anyOf("abc")) + OneOrMore(CharacterClass.anyOf("abc")) } """#) @@ -337,5 +337,56 @@ extension RenderDSLTests { } } """#) + + try testConversion(#"[^i]*"#, #""" + Regex { + ZeroOrMore(CharacterClass.anyOf("i").inverted) + } + """#) + } + + func testChangeMatchingOptions() throws { + try testConversion(#"(?s).*(?-s).*"#, #""" + Regex { + Regex { + ZeroOrMore { + /./ + } + Regex { + ZeroOrMore { + /./ + } + } + .dotMatchesNewlines(false) + } + .dotMatchesNewlines(true) + } + """#) + + try testConversion(#"(?U)a+(?-U)a+"#, #""" + Regex { + OneOrMore(.reluctant) { + "a" + } + OneOrMore { + "a" + } + } + """#) + + try testConversion(#"(?sim)hello(?-s)world"#, #""" + Regex { + Regex { + "hello" + Regex { + "world" + } + .dotMatchesNewlines(false) + } + .dotMatchesNewlines(true) + .ignoresCase(true) + .anchorsMatchLineEndings(true) + } + """#) } }