Skip to content

Commit 022380f

Browse files
committed
WIP: structuralize captures
1 parent 60173c1 commit 022380f

17 files changed

+774
-31
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,13 @@ extension Compiler.ByteCodeGen {
248248

249249
mutating func emitGroup(
250250
_ kind: AST.Group.Kind, _ child: DSLTree.Node
251-
) throws {
251+
) throws -> CaptureRegister? {
252252
options.beginScope()
253253
defer { options.endScope() }
254254

255255
if let lookaround = kind.lookaroundKind {
256256
try emitLookaround(lookaround, child)
257-
return
257+
return nil
258258
}
259259

260260
switch kind {
@@ -267,14 +267,17 @@ extension Compiler.ByteCodeGen {
267267
builder.buildBeginCapture(cap)
268268
try emitNode(child)
269269
builder.buildEndCapture(cap)
270+
return cap
270271

271272
case .changeMatchingOptions(let optionSequence, _):
272273
options.apply(optionSequence)
273274
try emitNode(child)
275+
return nil
274276

275277
default:
276278
// FIXME: Other kinds...
277279
try emitNode(child)
280+
return nil
278281
}
279282
}
280283

@@ -494,7 +497,7 @@ extension Compiler.ByteCodeGen {
494497
}
495498

496499
case let .group(kind, child):
497-
try emitGroup(kind, child)
500+
_ = try emitGroup(kind, child)
498501

499502
case .conditional:
500503
throw Unsupported("Conditionals")
@@ -518,9 +521,21 @@ extension Compiler.ByteCodeGen {
518521
case let .convertedRegexLiteral(n, _):
519522
try emitNode(n)
520523

521-
case let .groupTransform(kind, child, _):
522-
try emitGroup(kind, child)
523-
// FIXME: Transforms
524+
case let .groupTransform(kind, child, t):
525+
guard let cap = try emitGroup(kind, child) else {
526+
assertionFailure("""
527+
What does it mean to not have a capture to transform?
528+
""")
529+
return
530+
}
531+
532+
// FIXME: Is this how we want to do it?
533+
let transform = builder.makeTransformFunction {
534+
input, range in
535+
t(input[range])
536+
}
537+
538+
builder.buildTransformCapture(cap, transform)
524539

525540
case .absentFunction:
526541
throw Unsupported("absent function")

Sources/_StringProcessing/Capture.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ enum Capture {
1919
indirect case some(Capture)
2020
case none(childType: AnyType)
2121
indirect case array([Capture], childType: AnyType)
22+
}
2223

24+
extension Capture {
2325
static func none(childType: Any.Type) -> Capture {
2426
.none(childType: AnyType(childType))
2527
}
@@ -101,7 +103,7 @@ extension Capture: CustomStringConvertible {
101103
}
102104

103105
case let .some(n):
104-
printer.printBlock("Tuple") { printer in
106+
printer.printBlock("Some") { printer in
105107
n._print(&printer)
106108
}
107109

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Compiler {
2828
__consuming func emit() throws -> Program {
2929
// TODO: Handle global options
3030
var codegen = ByteCodeGen(options: options)
31+
codegen.builder.captureStructure = tree.captureStructure
3132
try codegen.emitNode(tree.root)
3233
let program = try codegen.finish()
3334
return program

Sources/_StringProcessing/Engine/Consume.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ extension Engine where Input == String {
6262
}
6363
guard let result = result else { return nil }
6464

65-
let capList = cpu.storedCaptures.map { $0.history }
65+
let capList = cpu.storedCaptures
6666
return (result, CaptureList(caps: capList))
6767
}
6868
}

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ extension Instruction.Payload {
6161
case packedBoolInt(BoolRegister, IntRegister)
6262
case packedEltBool(ElementRegister, BoolRegister)
6363
case packedPosPos(PositionRegister, PositionRegister)
64+
case packedCapTran(CaptureRegister, TransformRegister)
6465
}
6566
}
6667

@@ -280,5 +281,13 @@ extension Instruction.Payload {
280281
interpretPair()
281282
}
282283

284+
init(capture: CaptureRegister, transform: TransformRegister) {
285+
self.init(capture, transform)
286+
}
287+
var pairedCaptureTransform: (
288+
CaptureRegister, TransformRegister
289+
) {
290+
interpretPair()
291+
}
283292
}
284293

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ extension Instruction {
240240
///
241241
case endCapture
242242

243+
/// Transform a captured value, saving the built value
244+
///
245+
/// transformCapture(_:CapReg, _:TransformReg)
246+
///
247+
case transformCapture
248+
243249
/// Match a previously captured value
244250
///
245251
/// backreference(_:CapReg)

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ extension MEProgram where Input.Element: Hashable {
1818
var elements = TypedSetVector<Input.Element, _ElementRegister>()
1919
var sequences = TypedSetVector<[Input.Element], _SequenceRegister>()
2020
var strings = TypedSetVector<String, _StringRegister>()
21+
2122
var consumeFunctions: [ConsumeFunction] = []
2223
var assertionFunctions: [AssertionFunction] = []
24+
var transformFunctions: [TransformFunction] = []
2325

2426
// Map tokens to actual addresses
2527
var addressTokens: [InstructionAddress?] = []
@@ -34,6 +36,10 @@ extension MEProgram where Input.Element: Hashable {
3436
// Special addresses or instructions
3537
var failAddressToken: AddressToken? = nil
3638

39+
// TODO: Should we have better API for building this up
40+
// as we compile?
41+
var captureStructure: CaptureStructure = .empty
42+
3743
public init() {}
3844
}
3945
}
@@ -223,6 +229,14 @@ extension MEProgram.Builder {
223229
.init(.endCapture, .init(capture: cap)))
224230
}
225231

232+
public mutating func buildTransformCapture(
233+
_ cap: CaptureRegister, _ trans: TransformRegister
234+
) {
235+
instructions.append(.init(
236+
.transformCapture,
237+
.init(capture: cap, transform: trans)))
238+
}
239+
226240
public mutating func buildBackreference(
227241
_ cap: CaptureRegister
228242
) {
@@ -286,6 +300,7 @@ extension MEProgram.Builder {
286300
regInfo.positions = nextPositionRegister.rawValue
287301
regInfo.consumeFunctions = consumeFunctions.count
288302
regInfo.assertionFunctions = assertionFunctions.count
303+
regInfo.transformFunctions = transformFunctions.count
289304
regInfo.captures = nextCaptureRegister.rawValue
290305

291306
return MEProgram(
@@ -295,7 +310,9 @@ extension MEProgram.Builder {
295310
staticStrings: strings.stored,
296311
staticConsumeFunctions: consumeFunctions,
297312
staticAssertionFunctions: assertionFunctions,
298-
registerInfo: regInfo)
313+
staticTransformFunctions: transformFunctions,
314+
registerInfo: regInfo,
315+
captureStructure: captureStructure)
299316
}
300317

301318
public mutating func reset() { self = Self() }
@@ -433,5 +450,11 @@ extension MEProgram.Builder {
433450
defer { assertionFunctions.append(f) }
434451
return AssertionFunctionRegister(assertionFunctions.count)
435452
}
453+
public mutating func makeTransformFunction(
454+
_ f: @escaping MEProgram.TransformFunction
455+
) -> TransformRegister {
456+
defer { transformFunctions.append(f) }
457+
return TransformRegister(transformFunctions.count)
458+
}
436459
}
437460

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12+
import _MatchingEngine
13+
1214
/*
1315

1416
TODO: Specialized data structure for all captures:
@@ -45,28 +47,45 @@ extension Processor {
4547
//
4648
fileprivate var stack: Array<Range<Position>> = []
4749

50+
// Also save entire history of captured values -_-
51+
//
52+
// We will need to really zoom in on performance here...
53+
fileprivate var valueStack: Array<Any> = []
54+
4855
// An in-progress capture start
4956
fileprivate var currentCaptureBegin: Position? = nil
5057

5158
fileprivate func _invariantCheck() {
5259
if startState == nil {
5360
assert(stack.isEmpty)
61+
assert(valueStack.isEmpty)
5462
assert(currentCaptureBegin == nil)
5563
} else {
5664
assert(!stack.isEmpty || currentCaptureBegin != nil)
5765
}
66+
if hasValues {
67+
// FIXME: how?
68+
// assert(valueStack.count == stack.count)
69+
}
5870
}
5971

6072
// MARK: - IPI
6173

6274
var isEmpty: Bool { stack.isEmpty }
6375

76+
var hasValues: Bool { !valueStack.isEmpty }
77+
6478
var history: Array<Range<Position>> {
6579
stack
6680
}
81+
var valueHistory: Array<Any> {
82+
valueStack
83+
}
6784

6885
var latest: Range<Position>? { stack.last }
6986

87+
var latestValue: Any? { valueStack.last }
88+
7089
/// Start a new capture. If the previously started one was un-ended,
7190
/// will clear it and restart. If this is the first start, will save `initial`.
7291
mutating func startCapture(
@@ -89,6 +108,14 @@ extension Processor {
89108
stack.append(currentCaptureBegin! ..< idx)
90109
}
91110

111+
mutating func registerValue(
112+
_ value: Any
113+
) {
114+
_invariantCheck()
115+
defer { _invariantCheck() }
116+
valueStack.append(value)
117+
}
118+
92119
mutating func fail(truncatingAt stackIdx: Int) {
93120
_invariantCheck()
94121
assert(stackIdx <= stack.endIndex)
@@ -102,15 +129,28 @@ extension Processor {
102129
}
103130
}
104131

105-
public struct CaptureList {
106-
var caps: Array<Array<Range<String.Index>>>
107-
108-
func extract(from s: String) -> Array<Array<Substring>> {
109-
caps.map { $0.map { s[$0] } }
132+
extension Processor._StoredCapture: CustomStringConvertible {
133+
var description: String {
134+
if hasValues {
135+
return String(describing: valueStack)
136+
}
137+
return String(describing: history)
110138
}
139+
}
111140

112-
func latest(from s: String) -> Array<Substring?> {
113-
// TODO: If empty, probably need empty range or something...
114-
extract(from: s).map { $0.last }
141+
public struct CaptureList {
142+
var caps: Array<Processor<String>._StoredCapture>
143+
144+
// func extract(from s: String) -> Array<Array<Substring>> {
145+
// caps.map { $0.map { s[$0] } }
146+
// }
147+
//
148+
func latestUntyped(from s: String) -> Array<Substring?> {
149+
caps.map {
150+
guard let last = $0.latest else {
151+
return nil
152+
}
153+
return s[last]
154+
}
115155
}
116156
}

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,29 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12+
import _MatchingEngine
13+
1214
public struct MEProgram<Input: Collection> where Input.Element: Equatable {
1315
public typealias ConsumeFunction = (Input, Range<Input.Index>) -> Input.Index?
1416
public typealias AssertionFunction =
1517
(Input, Input.Index, Range<Input.Index>) -> Bool
18+
public typealias TransformFunction =
19+
(Input, Range<Input.Index>) -> Any?
20+
1621
var instructions: InstructionList<Instruction>
1722

1823
var staticElements: [Input.Element]
1924
var staticSequences: [[Input.Element]]
2025
var staticStrings: [String]
2126
var staticConsumeFunctions: [ConsumeFunction]
2227
var staticAssertionFunctions: [AssertionFunction]
28+
var staticTransformFunctions: [TransformFunction]
2329

2430
var registerInfo: RegisterInfo
2531

2632
var enableTracing: Bool = false
33+
34+
let captureStructure: CaptureStructure
2735
}
2836

2937
extension MEProgram: CustomStringConvertible {

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,23 @@ extension Processor {
406406
storedCaptures[capNum].endCapture(currentPosition)
407407
controller.step()
408408

409+
case .transformCapture:
410+
let (cap, trans) = payload.pairedCaptureTransform
411+
let transform = registers[trans]
412+
let capNum = Int(asserting: cap.rawValue)
413+
414+
guard let range = storedCaptures[capNum].latest else {
415+
fatalError(
416+
"Unreachable: transforming without a capture")
417+
}
418+
// FIXME: Pass input or the slice?
419+
guard let value = transform(input, range) else {
420+
signalFailure()
421+
return
422+
}
423+
storedCaptures[capNum].registerValue(value)
424+
425+
controller.step()
409426
}
410427
}
411428
}

0 commit comments

Comments
 (0)