Skip to content

Commit bbe756b

Browse files
authored
Cleanups and pretty printing (#159)
* [gardening] Keep parser comments consistent * Pretty-print CaptureStructure * Remove AnyCaptureType * Hook up capture structure pretty printer to command line tools
1 parent 875ab4f commit bbe756b

File tree

7 files changed

+126
-39
lines changed

7 files changed

+126
-39
lines changed

Sources/PatternConverter/PatternConverter.swift

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ struct PatternConverter: ParsableCommand {
3030
@Flag(help: "Whether to show canonical regex literal")
3131
var showCanonical: Bool = false
3232

33+
@Flag(help: "Whether to show capture structure")
34+
var showCaptureStructure: Bool = false
35+
36+
@Flag(help: "Whether to skip result builder DSL")
37+
var skipDSL: Bool = false
38+
3339
@Option(help: "Limit (from top-down) the conversion levels")
3440
var topDownConversionLimit: Int?
3541

@@ -65,12 +71,21 @@ struct PatternConverter: ParsableCommand {
6571
print()
6672
}
6773

74+
if showCaptureStructure {
75+
print("Capture structure:")
76+
print()
77+
print(ast.captureStructure)
78+
print()
79+
}
80+
6881
print()
69-
let render = ast.renderAsBuilderDSL(
70-
maxTopDownLevels: topDownConversionLimit,
71-
minBottomUpLevels: bottomUpConversionLimit
72-
)
73-
print(render)
82+
if !skipDSL {
83+
let render = ast.renderAsBuilderDSL(
84+
maxTopDownLevels: topDownConversionLimit,
85+
minBottomUpLevels: bottomUpConversionLimit
86+
)
87+
print(render)
88+
}
7489

7590
return
7691
}

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,39 @@ extension CaptureStructure {
400400
self = currentScope.count == 1 ? currentScope[0] : .tuple(currentScope)
401401
}
402402
}
403+
404+
extension CaptureStructure: CustomStringConvertible {
405+
public var description: String {
406+
var printer = PrettyPrinter()
407+
_print(&printer)
408+
return printer.finish()
409+
}
410+
411+
private func _print(_ printer: inout PrettyPrinter) {
412+
switch self {
413+
case let .atom(name, type):
414+
let name = name ?? "<unnamed>"
415+
let type = type == nil ? "<untyped>"
416+
: String(describing: type)
417+
printer.print("Atom(\(name): \(type))")
418+
419+
case let .array(c):
420+
printer.printBlock("Array") { printer in
421+
c._print(&printer)
422+
}
423+
424+
case let .optional(c):
425+
printer.printBlock("Optional") { printer in
426+
c._print(&printer)
427+
}
428+
429+
case let .tuple(cs):
430+
printer.printBlock("Tuple") { printer in
431+
for c in cs {
432+
c._print(&printer)
433+
}
434+
}
435+
436+
}
437+
}
438+
}

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ extension Source {
10221022

10231023
/// Attempt to lex the start of a group conditional.
10241024
///
1025-
/// GroupConditionalStart -> '(?' GroupStart
1025+
/// GroupCondStart -> '(?' GroupStart
10261026
///
10271027
mutating func lexGroupConditionalStart(
10281028
context: ParsingContext

Sources/_MatchingEngine/Regex/Parse/Parse.swift

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,33 @@
1313

1414
Syntactic structure of a regular expression
1515

16-
Regex -> '' | Alternation
16+
Regex -> GlobalMatchingOptionSequence? RegexNode
17+
RegexNode -> '' | Alternation
1718
Alternation -> Concatenation ('|' Concatenation)*
1819
Concatenation -> (!'|' !')' ConcatComponent)*
1920
ConcatComponent -> Trivia | Quote | Quantification
2021
Quantification -> QuantOperand Quantifier?
21-
QuantOperand -> Group | CustomCharClass | Atom
22-
Group -> GroupStart Regex ')'
22+
QuantOperand -> Conditional | Group | CustomCharClass
23+
| Atom | AbsentFunction
24+
25+
Conditional -> CondStart Concatenation ('|' Concatenation)? ')'
26+
CondStart -> KnownCondStart | GroupCondStart
27+
28+
Group -> GroupStart RegexNode ')'
2329

2430
Custom character classes are a mini-language to their own. We
2531
support UTS#18 set operators and nested character classes. The
2632
meaning of some atoms, such as `\b` changes inside a custom
27-
chararacter class. Below, we have a grammar "scope", that is we say
28-
"SetOp" to mean "CustomCharactetClass.SetOp", so we don't have to
29-
abbreviate/obfuscate/disambiguate with ugly names like "CCCSetOp".
33+
chararacter class. Below, we have a grammar "scope", that is we
34+
say "SetOp" to mean "CustomCharactetClass.SetOp", so we don't
35+
have to abbreviate/obfuscate/disambiguate with ugly names like
36+
"CCCSetOp".
3037

3138
Also, PCRE lets you end in `&&`, but not Oniguruma as it's a set
32-
operator. We probably want a rule similar to how you can end in `-`
33-
and that's just the character. Perhaps we also have syntax options
34-
in case we need a compatibilty mode (it's easy to add here and now)
39+
operator. We probably want a rule similar to how you can end in
40+
`-` and that's just the character. Perhaps we also have syntax
41+
options in case we need a compatibilty mode (it's easy to add
42+
here and now)
3543

3644
CustomCharClass -> Start Set (SetOp Set)* ']'
3745
Set -> Member+
@@ -46,6 +54,9 @@ Lexical analysis provides the following:
4654
Quantifier -> `lexQuantifier`
4755
GroupStart -> `lexGroupStart`
4856

57+
GroupCondStart -> `lexGroupConditionalStart`
58+
KnownCondStart -> `lexKnownCondition`
59+
4960
CustomCharacterClass.Start -> `lexCustomCCStart`
5061
CustomCharacterClass.SetOp -> `lexCustomCCBinOp`
5162

@@ -353,9 +364,9 @@ extension Parser {
353364
///
354365
/// QuantOperand -> Conditional | Group | CustomCharClass | Atom
355366
/// | AbsentFunction
356-
/// Group -> GroupStart RecursiveRegex ')'
357-
/// Conditional -> ConditionalStart Concatenation ('|' Concatenation)? ')'
358-
/// ConditionalStart -> KnownConditionalStart | GroupConditionalStart
367+
/// Group -> GroupStart RegexNode ')'
368+
/// Conditional -> CondStart Concatenation ('|' Concatenation)? ')'
369+
/// CondStart -> KnownCondStart | GroupCondStart
359370
///
360371
mutating func parseQuantifierOperand() throws -> AST.Node? {
361372
assert(!source.isEmpty)

Sources/_StringProcessing/Capture.swift

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ enum Capture {
1717
case atom(Any)
1818
indirect case tuple([Capture])
1919
indirect case some(Capture)
20-
case none(childType: AnyCaptureType)
21-
indirect case array([Capture], childType: AnyCaptureType)
20+
case none(childType: AnyType)
21+
indirect case array([Capture], childType: AnyType)
2222

2323
static func none(childType: Any.Type) -> Capture {
24-
.none(childType: AnyCaptureType(childType))
24+
.none(childType: AnyType(childType))
2525
}
2626

2727
static func array(_ children: [Capture], childType: Any.Type) -> Capture {
28-
.array(children, childType: AnyCaptureType(childType))
28+
.array(children, childType: AnyType(childType))
2929
}
3030
}
3131

@@ -77,19 +77,44 @@ extension Capture {
7777
}
7878
}
7979

80-
/// A wrapper of an existential metatype, equatable and hashable by reference.
81-
struct AnyCaptureType: Equatable, Hashable {
82-
var base: Any.Type
83-
84-
init(_ type: Any.Type) {
85-
base = type
80+
extension Capture: CustomStringConvertible {
81+
public var description: String {
82+
var printer = PrettyPrinter()
83+
_print(&printer)
84+
return printer.finish()
8685
}
8786

88-
static func == (lhs: AnyCaptureType, rhs: AnyCaptureType) -> Bool {
89-
lhs.base == rhs.base
90-
}
87+
private func _print(_ printer: inout PrettyPrinter) {
88+
switch self {
89+
case let .atom(n):
90+
printer.print("Atom(\(n))")
91+
case let .tuple(ns):
92+
if ns.isEmpty {
93+
printer.print("Tuple()")
94+
return
95+
}
96+
97+
printer.printBlock("Tuple") { printer in
98+
for n in ns {
99+
n._print(&printer)
100+
}
101+
}
91102

92-
func hash(into hasher: inout Hasher) {
93-
hasher.combine(ObjectIdentifier(base))
103+
case let .some(n):
104+
printer.printBlock("Tuple") { printer in
105+
n._print(&printer)
106+
}
107+
108+
case let .none(childType):
109+
printer.print("None(\(childType))")
110+
111+
case let .array(ns, childType):
112+
printer.printBlock("Array(\(childType))") { printer in
113+
for n in ns {
114+
n._print(&printer)
115+
}
116+
}
117+
118+
}
94119
}
95120
}

Sources/_StringProcessing/Legacy/RECode.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ extension RECode {
7575
case captureSome
7676

7777
/// Replace top-level captures with a single `Capture.optional(nil)`.
78-
case captureNil(childType: AnyCaptureType)
78+
case captureNil(childType: AnyType)
7979

8080
/// Form a `Capture.array(...)` from top-level captures, and use it to replace the top-level
8181
/// captures.
82-
case captureArray(childType: AnyCaptureType)
82+
case captureArray(childType: AnyType)
8383

8484
var isAccept: Bool {
8585
switch self {
@@ -132,11 +132,11 @@ extension RECode.Instruction {
132132
static func label(_ i: Int) -> Self { .label(LabelId(i)) }
133133

134134
static func captureNil(childType: Any.Type) -> Self {
135-
.captureNil(childType: AnyCaptureType(childType))
135+
.captureNil(childType: AnyType(childType))
136136
}
137137

138138
static func captureArray(childType: Any.Type) -> Self {
139-
.captureArray(childType: AnyCaptureType(childType))
139+
.captureArray(childType: AnyType(childType))
140140
}
141141
}
142142

Sources/_StringProcessing/Legacy/VirtualMachine.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,15 @@ extension RECode {
129129
topLevelCaptures = top
130130
}
131131

132-
mutating func captureNil(childType: AnyCaptureType) {
132+
mutating func captureNil(childType: AnyType) {
133133
topLevelCaptures = [.none(childType: childType)]
134134
}
135135

136136
mutating func captureSome() {
137137
topLevelCaptures = [.some(.tupleOrAtom(topLevelCaptures))]
138138
}
139139

140-
mutating func captureArray(childType: AnyCaptureType) {
140+
mutating func captureArray(childType: AnyType) {
141141
topLevelCaptures = [.array(topLevelCaptures, childType: childType)]
142142
}
143143

0 commit comments

Comments
 (0)