Skip to content

Commit ff635c6

Browse files
authored
Structuralization of captures (#160)
* Dont print during tests * Structuralize engine captures * Parameterize determining capture structure * Use Engine for all matches
1 parent 806df91 commit ff635c6

25 files changed

+973
-142
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@ extension AST {
2626
public var hasCapture: Bool { root.hasCapture }
2727

2828
/// The capture structure of this AST tree.
29-
public var captureStructure: CaptureStructure { root.captureStructure }
29+
public var captureStructure: CaptureStructure {
30+
var constructor = CaptureStructure.Constructor(.flatten)
31+
return root._captureStructure(&constructor)
32+
}
3033
}
3134

3235
extension AST {

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 87 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -25,137 +25,161 @@ public enum CaptureStructure: Equatable {
2525
}
2626
}
2727

28+
// TODO: Below are all flattening constructors. Instead create
29+
// a builder/visitor that can store the structuralization
30+
// approach
31+
2832
extension CaptureStructure {
29-
public init<C: Collection>(
30-
alternating children: C
31-
) where C.Element: _TreeNode {
32-
assert(children.count > 1)
33-
self = children
34-
.map(\.captureStructure)
35-
.reduce(.empty, +)
36-
.map(CaptureStructure.optional)
33+
public struct Constructor {
34+
var strategy: Strategy
35+
36+
public init(_ strategy: Strategy = .flatten) {
37+
guard strategy == .flatten else {
38+
fatalError("TODO: adjust creator methods")
39+
}
40+
self.strategy = strategy
41+
}
42+
}
43+
}
44+
45+
extension CaptureStructure.Constructor {
46+
public mutating func alternating<C: Collection>(
47+
_ children: C
48+
) -> CaptureStructure where C.Element: _TreeNode {
49+
// assert(children.count > 1)
50+
return children.map {
51+
$0._captureStructure(&self)
52+
}.reduce(.empty, +)
53+
.map(CaptureStructure.optional)
3754
}
38-
public init<C: Collection>(
39-
concatenating children: C
40-
) where C.Element: _TreeNode {
41-
self = children.map(\.captureStructure).reduce(.empty, +)
55+
public mutating func concatenating<C: Collection>(
56+
_ children: C
57+
) -> CaptureStructure where C.Element: _TreeNode {
58+
return children.map {
59+
$0._captureStructure(&self)
60+
}.reduce(.empty, +)
4261
}
4362

44-
public init<T: _TreeNode>(
45-
grouping child: T, as kind: AST.Group.Kind
46-
) {
47-
let innerCaptures = child.captureStructure
63+
public mutating func grouping<T: _TreeNode>(
64+
_ child: T, as kind: AST.Group.Kind
65+
) -> CaptureStructure {
66+
let innerCaptures = child._captureStructure(&self)
4867
switch kind {
4968
case .capture:
50-
self = .atom() + innerCaptures
69+
return .atom() + innerCaptures
5170
case .namedCapture(let name):
52-
self = .atom(name: name.value) + innerCaptures
71+
return .atom(name: name.value) + innerCaptures
5372
case .balancedCapture(let b):
54-
self = .atom(name: b.name?.value) + innerCaptures
73+
return .atom(name: b.name?.value) + innerCaptures
5574
default:
5675
precondition(!kind.isCapturing)
57-
self = innerCaptures
76+
return innerCaptures
5877
}
5978
}
6079

61-
public init<T: _TreeNode>(
62-
grouping child: T,
80+
public mutating func grouping<T: _TreeNode>(
81+
_ child: T,
6382
as kind: AST.Group.Kind,
6483
withTransform transform: CaptureTransform
65-
) {
66-
let innerCaptures = child.captureStructure
84+
) -> CaptureStructure {
85+
let innerCaptures = child._captureStructure(&self)
6786
switch kind {
6887
case .capture:
69-
self = .atom(type: AnyType(transform.resultType)) + innerCaptures
88+
return .atom(type: AnyType(transform.resultType)) + innerCaptures
7089
case .namedCapture(let name):
71-
self = .atom(name: name.value, type: AnyType(transform.resultType))
90+
return .atom(name: name.value, type: AnyType(transform.resultType))
7291
+ innerCaptures
7392
default:
74-
self = innerCaptures
93+
return innerCaptures
7594
}
7695
}
7796

7897
// TODO: We'll likely want/need a generalization of
7998
// conditional's condition kind.
80-
public init<T: _TreeNode>(
81-
condition: AST.Conditional.Condition.Kind,
99+
public mutating func condition<T: _TreeNode>(
100+
_ condition: AST.Conditional.Condition.Kind,
82101
trueBranch: T,
83102
falseBranch: T
84-
) {
103+
) -> CaptureStructure {
85104
// A conditional's capture structure is effectively that of an alternation
86105
// between the true and false branches. However the condition may also
87106
// have captures in the case of a group condition.
88107
var captures = CaptureStructure.empty
89108
switch condition {
90109
case .group(let g):
91-
captures = captures + AST.Node.group(g).captureStructure
110+
captures = captures + AST.Node.group(g)._captureStructure(&self)
92111
default:
93112
break
94113
}
95-
let branchCaptures = trueBranch.captureStructure +
96-
falseBranch.captureStructure
97-
self = captures + branchCaptures.map(
114+
let branchCaptures = trueBranch._captureStructure(&self) +
115+
falseBranch._captureStructure(&self)
116+
return captures + branchCaptures.map(
98117
CaptureStructure.optional)
99118
}
100119

101-
public init<T: _TreeNode>(
102-
quantifying child: T, amount: AST.Quantification.Amount
103-
) {
104-
self = child.captureStructure.map(
120+
public mutating func quantifying<T: _TreeNode>(
121+
_ child: T, amount: AST.Quantification.Amount
122+
) -> CaptureStructure {
123+
return child._captureStructure(&self).map(
105124
amount == .zeroOrOne
106125
? CaptureStructure.optional
107126
: CaptureStructure.array)
108127
}
109128

110129
// TODO: Will need to adjust for DSLTree support, and
111130
// "absent" isn't the best name for these.
112-
public init(
113-
absent kind: AST.AbsentFunction.Kind
114-
) {
131+
public mutating func absent(
132+
_ kind: AST.AbsentFunction.Kind
133+
) -> CaptureStructure {
115134
// Only the child of an expression absent function is relevant, as the
116135
// other expressions don't actually get matched against.
117136
switch kind {
118137
case .expression(_, _, let child):
119-
self = child.captureStructure
138+
return child._captureStructure(&self)
120139
case .clearer, .repeater, .stopper:
121-
self = .empty
140+
return .empty
122141
}
123142
}
124143

125144
}
126145

127146
extension AST.Node {
128-
public var captureStructure: CaptureStructure {
147+
public func _captureStructure(
148+
_ constructor: inout CaptureStructure.Constructor
149+
) -> CaptureStructure {
150+
guard constructor.strategy == .flatten else {
151+
fatalError("TODO")
152+
}
153+
129154
// Note: This implementation could be more optimized.
130155
switch self {
131156
case let .alternation(a):
132-
return CaptureStructure(alternating: a.children)
157+
return constructor.alternating(a.children)
133158

134159
case let .concatenation(c):
135-
return CaptureStructure(concatenating: c.children)
160+
return constructor.concatenating(c.children)
136161

137162
case let .group(g):
138-
return CaptureStructure(
139-
grouping: g.child, as: g.kind.value)
163+
return constructor.grouping(g.child, as: g.kind.value)
140164

141165
case .groupTransform(let g, let transform):
142-
return CaptureStructure(
143-
grouping: g.child,
166+
return constructor.grouping(
167+
g.child,
144168
as: g.kind.value,
145169
withTransform: transform)
146170

147171
case .conditional(let c):
148-
return CaptureStructure(
149-
condition: c.condition.kind,
172+
return constructor.condition(
173+
c.condition.kind,
150174
trueBranch: c.trueBranch,
151175
falseBranch: c.falseBranch)
152176

153177
case .quantification(let q):
154-
return CaptureStructure(
155-
quantifying: q.child, amount: q.amount.value)
178+
return constructor.quantifying(
179+
q.child, amount: q.amount.value)
156180

157181
case .absentFunction(let abs):
158-
return CaptureStructure(absent: abs.kind)
182+
return constructor.absent(abs.kind)
159183

160184
case .quote, .trivia, .atom, .customCharacterClass, .empty:
161185
return .empty
@@ -436,3 +460,11 @@ extension CaptureStructure: CustomStringConvertible {
436460
}
437461
}
438462
}
463+
464+
extension CaptureStructure.Constructor {
465+
public enum Strategy {
466+
case flatten
467+
case nest
468+
// case drop(after: Int)...
469+
}
470+
}

Sources/_MatchingEngine/Regex/TreeProtocols.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
public protocol _TreeNode {
44
var children: [Self]? { get }
55

6-
var captureStructure: CaptureStructure { get }
6+
func _captureStructure(
7+
_: inout CaptureStructure.Constructor
8+
) -> CaptureStructure
79
}
810

911
extension _TreeNode {

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,13 @@ extension Compiler.ByteCodeGen {
248248

249249
mutating func emitGroup(
250250
_ kind: AST.Group.Kind, _ child: DSLTree.Node
251-
) throws {
251+
) throws -> CaptureRegister? {
252252
options.beginScope()
253253
defer { options.endScope() }
254254

255255
if let lookaround = kind.lookaroundKind {
256256
try emitLookaround(lookaround, child)
257-
return
257+
return nil
258258
}
259259

260260
switch kind {
@@ -267,14 +267,17 @@ extension Compiler.ByteCodeGen {
267267
builder.buildBeginCapture(cap)
268268
try emitNode(child)
269269
builder.buildEndCapture(cap)
270+
return cap
270271

271272
case .changeMatchingOptions(let optionSequence, _):
272273
options.apply(optionSequence)
273274
try emitNode(child)
275+
return nil
274276

275277
default:
276278
// FIXME: Other kinds...
277279
try emitNode(child)
280+
return nil
278281
}
279282
}
280283

@@ -494,7 +497,7 @@ extension Compiler.ByteCodeGen {
494497
}
495498

496499
case let .group(kind, child):
497-
try emitGroup(kind, child)
500+
_ = try emitGroup(kind, child)
498501

499502
case .conditional:
500503
throw Unsupported("Conditionals")
@@ -518,9 +521,21 @@ extension Compiler.ByteCodeGen {
518521
case let .convertedRegexLiteral(n, _):
519522
try emitNode(n)
520523

521-
case let .groupTransform(kind, child, _):
522-
try emitGroup(kind, child)
523-
// FIXME: Transforms
524+
case let .groupTransform(kind, child, t):
525+
guard let cap = try emitGroup(kind, child) else {
526+
assertionFailure("""
527+
What does it mean to not have a capture to transform?
528+
""")
529+
return
530+
}
531+
532+
// FIXME: Is this how we want to do it?
533+
let transform = builder.makeTransformFunction {
534+
input, range in
535+
t(input[range])
536+
}
537+
538+
builder.buildTransformCapture(cap, transform)
524539

525540
case .absentFunction:
526541
throw Unsupported("absent function")

Sources/_StringProcessing/Capture.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ enum Capture {
1919
indirect case some(Capture)
2020
case none(childType: AnyType)
2121
indirect case array([Capture], childType: AnyType)
22+
}
2223

24+
extension Capture {
2325
static func none(childType: Any.Type) -> Capture {
2426
.none(childType: AnyType(childType))
2527
}
@@ -101,7 +103,7 @@ extension Capture: CustomStringConvertible {
101103
}
102104

103105
case let .some(n):
104-
printer.printBlock("Tuple") { printer in
106+
printer.printBlock("Some") { printer in
105107
n._print(&printer)
106108
}
107109

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Compiler {
2828
__consuming func emit() throws -> Program {
2929
// TODO: Handle global options
3030
var codegen = ByteCodeGen(options: options)
31+
codegen.builder.captureStructure = tree.captureStructure
3132
try codegen.emitNode(tree.root)
3233
let program = try codegen.finish()
3334
return program

Sources/_StringProcessing/Engine/Consume.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ extension Engine where Input == String {
6262
}
6363
guard let result = result else { return nil }
6464

65-
let capList = cpu.storedCaptures.map { $0.history }
65+
let capList = cpu.storedCaptures
6666
return (result, CaptureList(caps: capList))
6767
}
6868
}

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ extension Instruction.Payload {
6161
case packedBoolInt(BoolRegister, IntRegister)
6262
case packedEltBool(ElementRegister, BoolRegister)
6363
case packedPosPos(PositionRegister, PositionRegister)
64+
case packedCapTran(CaptureRegister, TransformRegister)
6465
}
6566
}
6667

@@ -280,5 +281,13 @@ extension Instruction.Payload {
280281
interpretPair()
281282
}
282283

284+
init(capture: CaptureRegister, transform: TransformRegister) {
285+
self.init(capture, transform)
286+
}
287+
var pairedCaptureTransform: (
288+
CaptureRegister, TransformRegister
289+
) {
290+
interpretPair()
291+
}
283292
}
284293

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ extension Instruction {
240240
///
241241
case endCapture
242242

243+
/// Transform a captured value, saving the built value
244+
///
245+
/// transformCapture(_:CapReg, _:TransformReg)
246+
///
247+
case transformCapture
248+
243249
/// Match a previously captured value
244250
///
245251
/// backreference(_:CapReg)

0 commit comments

Comments
 (0)