From 9d86c21aa83cb10d970f54b1a8e4d6ca771ea1e3 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 12 May 2022 15:09:11 -0700 Subject: [PATCH 1/2] Wrap character classes around One --- .../_StringProcessing/PrintAsPattern.swift | 240 +++++++++++++----- 1 file changed, 181 insertions(+), 59 deletions(-) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 601447968..1d462ef4c 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -137,6 +137,51 @@ extension PrettyPrinter { blockName = "\(amount)" } + // Special case single child character classes for repetition nodes. + // This lets us do something like the following: + // + // OneOrMore(.digit) + // vs + // OneOrMore { + // One(.digit) + // } + // + func printSimpleCCC( + _ ccc: DSLTree.CustomCharacterClass + ) { + indent() + + if kind != ".eager" { + blockName.removeLast() + output("\(blockName), ") + } else { + output("\(blockName)(") + } + + printAsPattern(ccc, wrap: false, terminateLine: false) + output(")") + terminateLine() + } + + switch child { + case let .customCharacterClass(ccc): + if ccc.isSimplePrint { + printSimpleCCC(ccc) + return + } + + break + case let .convertedRegexLiteral(.customCharacterClass(ccc), _): + if ccc.isSimplePrint { + printSimpleCCC(ccc) + return + } + + break + default: + break + } + printBlock(blockName) { printer in printer.printAsPattern(convertedFromAST: child) } @@ -187,10 +232,10 @@ extension PrettyPrinter { print("/* TODO: absent function */") } } - - // TODO: Some way to integrate this with conversion... + mutating func printAsPattern( _ ccc: DSLTree.CustomCharacterClass, + wrap: Bool = true, terminateLine: Bool = true ) { if ccc.hasUnprintableProperty { @@ -202,11 +247,10 @@ extension PrettyPrinter { if ccc.isInverted { printIndented { printer in printer.indent() + printer.output(".inverted") if terminateLine { - printer.print(".inverted") - } else { - printer.output(".inverted") + printer.terminateLine() } } } @@ -215,7 +259,7 @@ extension PrettyPrinter { // If we only have 1 member, then we can emit it without the extra // CharacterClass initialization if ccc.members.count == 1 { - printAsPattern(ccc.members[0]) + printAsPattern(ccc.members[0], wrap: wrap) if terminateLine { self.terminateLine() @@ -279,12 +323,20 @@ extension PrettyPrinter { // Also in the same vein, if we have a few atom members but no // nonAtomMembers, then we can emit a single .anyOf(...) for them. if !charMembers.isEmpty, nonCharMembers.isEmpty { - if terminateLine { - print(".anyOf(\(charMembers._quoted))") + let anyOf = ".anyOf(\(charMembers._quoted))" + + indent() + + if wrap { + output("One(\(anyOf))") } else { - indent() - output(".anyOf(\(charMembers._quoted))") + output(anyOf) } + + if terminateLine { + self.terminateLine() + } + return } @@ -304,7 +356,7 @@ extension PrettyPrinter { } for (i, member) in nonCharMembers.enumerated() { - printer.printAsPattern(member) + printer.printAsPattern(member, wrap: false) if i != nonCharMembers.count - 1 { printer.output(",") @@ -314,17 +366,18 @@ extension PrettyPrinter { } } + indent() + output(")") + if terminateLine { - print(")") - } else { - indent() - output(")") + self.terminateLine() } } // TODO: Some way to integrate this with conversion... mutating func printAsPattern( - _ member: DSLTree.CustomCharacterClass.Member + _ member: DSLTree.CustomCharacterClass.Member, + wrap: Bool = true ) { switch member { case let .custom(ccc): @@ -344,17 +397,40 @@ extension PrettyPrinter { indent() switch a { case let .char(c): - output(".anyOf(\(String(c)._quoted))") + + if wrap { + output("One(.anyOf(\(String(c)._quoted)))") + } else { + output(".anyOf(\(String(c)._quoted))") + } + case let .scalar(s): - output(".anyOf(\"\\u{\(String(s.value, radix: 16))}\")") + + if wrap { + output("One(.anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\"))") + } else { + output(".anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\")") + } + case let .unconverted(a): - output(a.ast._patternBase) + let base = a.ast._patternBase + + if base.canBeWrapped, wrap { + output("One(\(base.0))") + } else { + output(base.0) + } default: print(" // TODO: Atom \(a)") } case .quotedLiteral(let s): - output(".anyOf(\(s._quoted))") + + if wrap { + output("One(.anyOf(\(s._quoted)))") + } else { + output(".anyOf(\(s._quoted))") + } case .trivia(_): // We never print trivia @@ -658,118 +734,118 @@ extension AST.Atom { /// caller, but we might want to be parameterized at that point. /// /// TODO: Some way to integrate this with conversion... - var _patternBase: String { + var _patternBase: (String, canBeWrapped: Bool) { if let anchor = self.assertionKind { - return anchor._patternBase + return (anchor._patternBase, false) } if isUnprintableAtom { - return _regexBase + return (_regexBase, false) } return _dslBase } - var _dslBase: String { + var _dslBase: (String, canBeWrapped: Bool) { func scalarLiteral(_ s: UnicodeScalar) -> String { let hex = String(s.value, radix: 16, uppercase: true) return "\\u{\(hex)}" } switch kind { case let .char(c): - return String(c) + return (String(c), false) case let .scalar(s): - return scalarLiteral(s.value) + return (scalarLiteral(s.value), false) case let .scalarSequence(seq): - return seq.scalarValues.map(scalarLiteral).joined() + return (seq.scalarValues.map(scalarLiteral).joined(), false) case let .property(p): - return p._dslBase + return (p._dslBase, true) case let .escaped(e): switch e { // Anchors case .wordBoundary: - return "Anchor.wordBoundary" + return ("Anchor.wordBoundary", false) case .notWordBoundary: - return "Anchor.wordBoundary.inverted" + return ("Anchor.wordBoundary.inverted", false) case .startOfSubject: - return "Anchor.startOfSubject" + return ("Anchor.startOfSubject", false) case .endOfSubject: - return "Anchor.endOfSubject" + return ("Anchor.endOfSubject", false) case .endOfSubjectBeforeNewline: - return "Anchor.endOfSubjectBeforeNewline" + return ("Anchor.endOfSubjectBeforeNewline", false) case .firstMatchingPositionInSubject: - return "Anchor.firstMatchingPositionInSubject" + return ("Anchor.firstMatchingPositionInSubject", false) case .textSegment: - return "Anchor.textSegmentBoundary" + return ("Anchor.textSegmentBoundary", false) case .notTextSegment: - return "Anchor.textSegmentBoundary.inverted" + return ("Anchor.textSegmentBoundary.inverted", false) // Character Classes case .decimalDigit: - return ".digit" + return (".digit", true) case .notDecimalDigit: - return ".digit.inverted" + return (".digit.inverted", true) case .horizontalWhitespace: - return ".horizontalWhitespace" + return (".horizontalWhitespace", true) case .notHorizontalWhitespace: - return ".horizontalWhitespace.inverted" + return (".horizontalWhitespace.inverted", true) case .whitespace: - return ".whitespace" + return (".whitespace", true) case .notWhitespace: - return ".whitespace.inverted" + return (".whitespace.inverted", true) case .wordCharacter: - return ".word" + return (".word", true) case .notWordCharacter: - return ".word.inverted" + return (".word.inverted", true) case .graphemeCluster: - return ".anyGraphemeCluster" + return (".anyGraphemeCluster", true) case .newlineSequence: - return ".newlineSequence" + return (".newlineSequence", true) case .notNewline: - return ".newlineSequence.inverted" + return (".newlineSequence.inverted", true) case .verticalTab: - return ".verticalWhitespace" + return (".verticalWhitespace", true) case .notVerticalTab: - return ".verticalWhitespace.inverted" + return (".verticalWhitespace.inverted", true) // Literal single characters all get converted into DSLTree.Atom.scalar default: - return "TODO: escaped \(e)" + return ("TODO: escaped \(e)", false) } case .namedCharacter: - return " /* TODO: named character */" + return (" /* TODO: named character */", false) case .any: - return ".any" + return (".any", true) case .startOfLine, .endOfLine: fatalError("unreachable") case .backreference: - return " /* TODO: back reference */" + return (" /* TODO: back reference */", false) case .subpattern: - return " /* TODO: subpattern */" + return (" /* TODO: subpattern */", false) case .callout: - return " /* TODO: callout */" + return (" /* TODO: callout */", false) case .backtrackingDirective: - return " /* TODO: backtracking directive */" + return (" /* TODO: backtracking directive */", false) case .changeMatchingOptions: - return "/* TODO: change matching options */" + return ("/* TODO: change matching options */", false) // Every other case we've already decided cannot be represented inside the // DSL. default: - return "" + return ("", false) } } @@ -877,6 +953,52 @@ extension DSLTree.CustomCharacterClass { $0.isUnprintableMember } } + + var isSimplePrint: Bool { + if members.count == 1 { + switch members[0] { + case .intersection(_, _): + return false + case .subtraction(_, _): + return false + case .symmetricDifference(_, _): + return false + default: + return true + } + } + + let nonCharMembers = members.filter { + switch $0 { + case let .atom(a): + switch a { + case .char(_): + return false + case .scalar(_): + return false + case .unconverted(_): + return true + default: + return true + } + + case .quotedLiteral(_): + return false + + case .trivia(_): + return false + + default: + return true + } + } + + if nonCharMembers.isEmpty { + return true + } + + return false + } } extension DSLTree.Atom { @@ -896,7 +1018,7 @@ extension DSLTree.Atom { if a.ast.isUnprintableAtom { return "#/\(a.ast._regexBase)/#" } else { - return a.ast._dslBase + return a.ast._dslBase.0 } case .assertion(let a): From 24c139aad8f10d9329112d6758ab531e139b0633 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Thu, 12 May 2022 15:35:46 -0700 Subject: [PATCH 2/2] fix intersection, subtraction, symmetricDiference --- .../_StringProcessing/PrintAsPattern.swift | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 1d462ef4c..75195f409 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -437,31 +437,58 @@ extension PrettyPrinter { break case .intersection(let first, let second): - printAsPattern(first) + if wrap, first.isSimplePrint { + indent() + output("One(") + } + + printAsPattern(first, wrap: false) printIndented { printer in printer.indent() printer.output(".intersection(") - printer.printAsPattern(second, terminateLine: false) + printer.printAsPattern(second, wrap: false, terminateLine: false) printer.output(")") } + if wrap, first.isSimplePrint { + output(")") + } + case .subtraction(let first, let second): - printAsPattern(first) + if wrap, first.isSimplePrint { + indent() + output("One(") + } + + printAsPattern(first, wrap: false) printIndented { printer in printer.indent() printer.output(".subtracting(") - printer.printAsPattern(second, terminateLine: false) + printer.printAsPattern(second, wrap: false, terminateLine: false) printer.output(")") } + if wrap, first.isSimplePrint { + output(")") + } + case .symmetricDifference(let first, let second): - printAsPattern(first) + if wrap, first.isSimplePrint { + indent() + output("One(") + } + + printAsPattern(first, wrap: false) printIndented { printer in printer.indent() printer.output(".symmetricDifference(") - printer.printAsPattern(second, terminateLine: false) + printer.printAsPattern(second, wrap: false, terminateLine: false) printer.output(")") } + + if wrap, first.isSimplePrint { + output(")") + } } }