Skip to content

Commit 74f4c70

Browse files
authored
Source plumbing, source rendering (#66)
Track source ranges in more places, add rendering path and tests
1 parent 23ff648 commit 74f4c70

File tree

13 files changed

+337
-126
lines changed

13 files changed

+337
-126
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ private struct ASTStorage {
4343

4444
extension AST {
4545
// :-(
46+
//
47+
// Existential-based programming is highly prone to silent
48+
// errors, but it does enable us to avoid having to switch
49+
// over `self` _everywhere_ we want to do anything.
4650
var _associatedValue: _ASTNode {
4751
switch self {
4852
case let .alternation(v): return v

Sources/_MatchingEngine/Regex/AST/ASTBuilder.swift

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ AST.
1414

1515
*/
1616

17+
// TODO: Sink this file into _StringProcessing and make it all
18+
// internal. For now, this lets us incrementally add source
19+
// ranges...
20+
1721
public let _fakeLoc = "".startIndex
1822
public let _fakeRange = _fakeLoc ..< _fakeLoc
1923
public func _fake<T: Hashable>(_ t: T) -> AST.Loc<T> {
@@ -138,21 +142,23 @@ public func quantRange(
138142
}
139143

140144
public func charClass(
141-
_ members: CustomCharacterClass.Member...,
145+
_ members: AST.CustomCharacterClass.Member...,
142146
inverted: Bool = false
143147
) -> AST {
144-
let cc = CustomCharacterClass(
145-
inverted ? .inverted : .normal, members, _fakeRange
146-
)
148+
let cc = CustomCC(
149+
_fake(inverted ? .inverted : .normal),
150+
members,
151+
_fakeRange)
147152
return .customCharacterClass(cc)
148153
}
149154
public func charClass(
150-
_ members: CustomCharacterClass.Member...,
155+
_ members: AST.CustomCharacterClass.Member...,
151156
inverted: Bool = false
152-
) -> CustomCharacterClass.Member {
153-
let cc = CustomCharacterClass(
154-
inverted ? .inverted : .normal, members, _fakeRange
155-
)
157+
) -> AST.CustomCharacterClass.Member {
158+
let cc = CustomCC(
159+
_fake(inverted ? .inverted : .normal),
160+
members,
161+
_fakeRange)
156162
return .custom(cc)
157163
}
158164
public func posixSet(

Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift

Lines changed: 83 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
/*
2+
3+
Common protocols for AST nodes and values. These allow us
4+
to do more capabilities-based programming, currently
5+
implemented on top of existentials.
6+
7+
*/
8+
9+
10+
11+
// MARK: - AST parent/child
12+
13+
protocol _ASTNode: _ASTPrintable {
14+
var sourceRange: SourceRange { get }
15+
}
16+
extension _ASTNode {
17+
var startLoc: SourceLoc { sourceRange.lowerBound }
18+
var endLoc: SourceLoc { sourceRange.upperBound }
19+
}
20+
21+
protocol _ASTParent: _ASTNode {
22+
var children: [AST] { get }
23+
}
24+
25+
extension AST.Concatenation: _ASTParent {}
26+
extension AST.Alternation: _ASTParent {}
27+
28+
extension AST.Group: _ASTParent {
29+
var children: [AST] { [child] }
30+
}
31+
extension AST.Quantification: _ASTParent {
32+
var children: [AST] { [child] }
33+
}
34+
35+
136
// MARK: - Printing
237

338
/// AST entities can be pretty-printed or dumped
@@ -53,23 +88,56 @@ extension AST: _ASTPrintable {
5388
}
5489
}
5590

56-
// MARK: - AST parent/child
91+
// MARK: - Rendering
5792

58-
protocol _ASTNode: _ASTPrintable {
59-
var sourceRange: SourceRange { get }
60-
}
93+
// Useful for testing, debugging, etc.
94+
//
95+
// TODO: Prettier rendering, probably inverted
96+
extension AST {
6197

62-
protocol _ASTParent: _ASTNode {
63-
var children: [AST] { get }
64-
}
98+
func _postOrder() -> Array<AST> {
99+
var nodes = Array<AST>()
100+
_postOrder(into: &nodes)
101+
return nodes
102+
}
103+
func _postOrder(into array: inout Array<AST>) {
104+
children?.forEach { $0._postOrder(into: &array) }
105+
array.append(self)
106+
}
65107

66-
extension AST.Concatenation: _ASTParent {}
67-
extension AST.Alternation: _ASTParent {}
108+
// We render from top-to-bottom, coalescing siblings
109+
public func _render(in input: String) -> [String] {
110+
let base = String(repeating: " ", count: input.count)
111+
var lines = [base]
68112

69-
extension AST.Group: _ASTParent {
70-
var children: [AST] { [child] }
71-
}
72-
extension AST.Quantification: _ASTParent {
73-
var children: [AST] { [child] }
74-
}
113+
let nodes = _postOrder().filter(\.sourceRange.isReal)
75114

115+
nodes.forEach { node in
116+
let sr = node.sourceRange
117+
let count = input[sr].count
118+
for idx in lines.indices {
119+
if lines[idx][sr].all(\.isWhitespace) {
120+
node._renderRange(count: count, into: &lines[idx])
121+
return
122+
}
123+
}
124+
var nextLine = base
125+
node._renderRange(count: count, into: &nextLine)
126+
lines.append(nextLine)
127+
}
128+
129+
return lines.first!.all(\.isWhitespace) ? [] : lines
130+
}
131+
132+
// Produce a textually "rendered" rane
133+
//
134+
// NOTE: `input` must be the string from which a
135+
// source range was derived.
136+
func _renderRange(
137+
count: Int, into output: inout String
138+
) {
139+
guard count > 0 else { return }
140+
let repl = String(repeating: "-", count: count-1) + "^"
141+
output.replaceSubrange(sourceRange, with: repl)
142+
}
143+
}

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ extension Atom {
462462

463463
extension Atom {
464464
var sourceRange: SourceRange {
465-
// TODO: Does this mean we need to make Atom a struct?
465+
// FIXME: source location tracking
466466
_fakeRange
467467
}
468468
}
Lines changed: 50 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,61 @@
11

2-
// TODO: source ranges? ASTValue? etc?
3-
public struct CustomCharacterClass: Hashable {
4-
public var start: Start
5-
public var members: [Member]
6-
7-
public let sourceRange: SourceRange
8-
9-
10-
public init(
11-
_ start: Start,
12-
_ members: [Member],
13-
_ sr: SourceRange
14-
) {
15-
self.start = start
16-
self.members = members
17-
self.sourceRange = sr
18-
}
19-
20-
public enum Member: Hashable {
21-
/// A nested custom character class `[[ab][cd]]`
22-
case custom(CustomCharacterClass)
23-
24-
/// A character range `a-z`
25-
case range(Atom, Atom)
26-
27-
/// A single character or escape
28-
case atom(Atom)
29-
30-
/// A binary operator applied to sets of members `abc&&def`
31-
case setOperation([Member], SetOp, [Member])
32-
}
33-
public enum SetOp: String, Hashable {
34-
case subtraction = "--"
35-
case intersection = "&&"
36-
case symmetricDifference = "~~"
37-
}
38-
public enum Start: Hashable {
39-
/// `[`
40-
case normal
41-
42-
/// `[^`
43-
case inverted
2+
extension AST {
3+
public struct CustomCharacterClass: Hashable {
4+
public var start: Loc<Start>
5+
public var members: [Member]
6+
7+
public let sourceRange: SourceRange
8+
9+
public init(
10+
_ start: Loc<Start>,
11+
_ members: [Member],
12+
_ sr: SourceRange
13+
) {
14+
self.start = start
15+
self.members = members
16+
self.sourceRange = sr
17+
}
18+
19+
// FIXME: track source ranges
20+
public enum Member: Hashable {
21+
/// A nested custom character class `[[ab][cd]]`
22+
case custom(CustomCharacterClass)
23+
24+
/// A character range `a-z`
25+
case range(Atom, Atom)
26+
27+
/// A single character or escape
28+
case atom(Atom)
29+
30+
/// A binary operator applied to sets of members `abc&&def`
31+
case setOperation([Member], Loc<SetOp>, [Member])
32+
}
33+
public enum SetOp: String, Hashable {
34+
case subtraction = "--"
35+
case intersection = "&&"
36+
case symmetricDifference = "~~"
37+
}
38+
public enum Start: Hashable {
39+
/// `[`
40+
case normal
41+
42+
/// `[^`
43+
case inverted
44+
}
4445
}
4546
}
4647

47-
extension CustomCharacterClass {
48-
public var isInverted: Bool { start == .inverted }
48+
/// `AST.CustomCharacterClass.Start` is a mouthful
49+
internal typealias CustomCC = AST.CustomCharacterClass
50+
51+
extension CustomCC {
52+
public var isInverted: Bool { start.value == .inverted }
4953
}
5054

51-
extension CustomCharacterClass: _ASTNode {
55+
extension CustomCC: _ASTNode {
5256
public var _dumpBase: String {
5357
// FIXME: print out members...
5458
"customCharacterClass"
5559
}
5660
}
61+

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,16 @@ extension Source {
221221

222222
switch (lowerOpt, closedRange, upperOpt) {
223223
case let (l?, nil, nil):
224+
// FIXME: source location tracking
224225
return .exactly(_fake(l))
225226
case let (l?, true, nil):
227+
// FIXME: source location tracking
226228
return .nOrMore(_fake(l))
227229
case let (nil, closed?, u?):
230+
// FIXME: source location tracking
228231
return .upToN(_fake(closed ? u : u-1))
229232
case let (l?, closed?, u?):
233+
// FIXME: source location tracking
230234
return .range(
231235
_fake(l) ... _fake(closed ? u : u-1))
232236
case let (nil, nil, u) where u != nil:
@@ -327,6 +331,7 @@ extension Source {
327331
while src.tryEat(" ") {
328332
didSomething = true
329333
}
334+
// FIXME: source location tracking
330335
return didSomething ? AST.Trivia(_fakeRange) : nil
331336
}
332337
}
@@ -390,7 +395,7 @@ extension Source {
390395
}
391396

392397
mutating func lexCustomCCStart(
393-
) throws -> Value<CustomCharacterClass.Start>? {
398+
) throws -> Value<CustomCC.Start>? {
394399
try recordLoc { src in
395400
// POSIX named sets are atoms.
396401
guard !src.starts(with: "[:") else { return nil }
@@ -406,7 +411,7 @@ extension Source {
406411
///
407412
/// CustomCCBinOp -> '--' | '~~' | '&&'
408413
///
409-
mutating func lexCustomCCBinOp() throws -> Value<CustomCharacterClass.SetOp>? {
414+
mutating func lexCustomCCBinOp() throws -> Value<CustomCC.SetOp>? {
410415
try recordLoc { src in
411416
// TODO: Perhaps a syntax options check (!PCRE)
412417
// TODO: Better AST types here
@@ -417,7 +422,7 @@ extension Source {
417422
}
418423

419424
// Check to see if we can lex a binary operator.
420-
func peekCCBinOp() -> CustomCharacterClass.SetOp? {
425+
func peekCCBinOp() -> CustomCC.SetOp? {
421426
if starts(with: "--") { return .subtraction }
422427
if starts(with: "~~") { return .symmetricDifference }
423428
if starts(with: "&&") { return .intersection }

0 commit comments

Comments
 (0)