Skip to content

Commit bfea2bf

Browse files
committed
Backend foundations of captures and back references
No compiler support just yet
1 parent ce981d2 commit bfea2bf

File tree

6 files changed

+173
-32
lines changed

6 files changed

+173
-32
lines changed

Sources/_MatchingEngine/Engine/Builder.swift

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ extension Program where Input.Element: Hashable {
1414
// Registers
1515
var nextBoolRegister = BoolRegister(0)
1616
var nextIntRegister = IntRegister(0)
17+
var nextPositionRegister = PositionRegister(0)
1718

1819
// Special addresses or instructions
1920
var failAddressToken: AddressToken? = nil
@@ -73,6 +74,13 @@ extension Program.Builder {
7374
buildMoveImmediate(uint, into: into)
7475
}
7576

77+
public mutating func buildMoveCurrentPosition(
78+
into: PositionRegister
79+
) {
80+
instructions.append(.init(
81+
.movePosition, .init(position: into)))
82+
}
83+
7684
public mutating func buildBranch(to t: AddressToken) {
7785
instructions.append(.init(.branch))
7886
fixup(to: t)
@@ -150,6 +158,14 @@ extension Program.Builder {
150158
.init(sequence: sequences.store(.init(s)))))
151159
}
152160

161+
public mutating func buildMatchSlice(
162+
lower: PositionRegister, upper: PositionRegister
163+
) {
164+
instructions.append(.init(
165+
.matchSlice,
166+
.init(pos: lower, pos2: upper)))
167+
}
168+
153169
public mutating func buildConsume(
154170
by p: @escaping Program.ConsumeFunction
155171
) {
@@ -225,6 +241,7 @@ extension Program.Builder {
225241
regInfo.strings = strings.count
226242
regInfo.bools = nextBoolRegister.rawValue
227243
regInfo.ints = nextIntRegister.rawValue
244+
regInfo.positions = nextPositionRegister.rawValue
228245
regInfo.consumeFunctions = consumeFunctions.count
229246

230247
return Program(
@@ -311,6 +328,10 @@ extension Program.Builder {
311328
defer { nextIntRegister.rawValue += 1 }
312329
return nextIntRegister
313330
}
331+
public mutating func makePositionRegister() -> PositionRegister {
332+
defer { nextPositionRegister.rawValue += 1 }
333+
return nextPositionRegister
334+
}
314335

315336
// Allocate and initialize a register
316337
public mutating func makeIntRegister(
@@ -321,6 +342,15 @@ extension Program.Builder {
321342
return r
322343
}
323344

345+
// Allocate and initialize a register
346+
public mutating func makePositionRegister(
347+
initializingWithCurrentPosition: ()
348+
) -> PositionRegister {
349+
let r = makePositionRegister()
350+
self.buildMoveCurrentPosition(into: r)
351+
return r
352+
}
353+
324354
// 'kill' or release allocated registers
325355
public mutating func kill(_ r: IntRegister) {
326356
// TODO: Release/reuse registers, for now nop makes
@@ -332,15 +362,20 @@ extension Program.Builder {
332362
// reading the code easier
333363
buildNop("kill \(r)")
334364
}
365+
public mutating func kill(_ r: PositionRegister) {
366+
// TODO: Release/reuse registers, for now nop makes
367+
// reading the code easier
368+
buildNop("kill \(r)")
369+
}
370+
371+
// TODO: A register-mapping helper struct, which could release
372+
// registers without monotonicity required
335373

336374
public mutating func makeConsumeFunction(
337375
_ f: @escaping Program.ConsumeFunction
338376
) -> ConsumeFunctionRegister {
339377
defer { consumeFunctions.append(f) }
340378
return ConsumeFunctionRegister(consumeFunctions.count)
341379
}
342-
343-
// TODO: consider releasing registers
344-
345380
}
346381

Sources/_MatchingEngine/Engine/InstPayload.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ extension Instruction.Payload {
3232

3333
case string(StringRegister)
3434
case sequence(SequenceRegister)
35+
case position(PositionRegister)
3536
case optionalString(StringRegister?)
3637
case int(IntRegister)
3738
case distance(Distance)
@@ -46,6 +47,7 @@ extension Instruction.Payload {
4647
case packedAddrAddr(InstructionAddress, InstructionAddress)
4748
case packedBoolInt(BoolRegister, IntRegister)
4849
case packedEltBool(ElementRegister, BoolRegister)
50+
case packedPosPos(PositionRegister, PositionRegister)
4951
}
5052
}
5153

@@ -142,6 +144,13 @@ extension Instruction.Payload {
142144
interpret()
143145
}
144146

147+
init(position: PositionRegister) {
148+
self.init(position)
149+
}
150+
var position: PositionRegister {
151+
interpret()
152+
}
153+
145154
init(int: IntRegister) {
146155
self.init(int)
147156
}
@@ -236,5 +245,12 @@ extension Instruction.Payload {
236245
interpretPair()
237246
}
238247

248+
init(pos: PositionRegister, pos2: PositionRegister) {
249+
self.init(pos, pos2)
250+
}
251+
var pairedPosPos: (PositionRegister, PositionRegister) {
252+
interpretPair()
253+
}
254+
239255
}
240256

Sources/_MatchingEngine/Engine/Instruction.swift

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ extension Instruction {
2020
///
2121
/// nop(comment: String?)
2222
///
23-
/// Operand:
24-
/// - Optional string register containing a comment or reason
23+
/// Operand: Optional string register containing a comment or reason
2524
///
2625
case nop
2726

@@ -41,29 +40,37 @@ extension Instruction {
4140
/// moveImmediate(_ i: Int, into: IntReg)
4241
///
4342
/// Operands:
44-
/// - TODO
43+
/// - Immediate value to move
44+
/// - Int register to move into
4545
///
4646
case moveImmediate
4747

4848
// MARK: General Purpose: Control flow
4949

5050
/// Branch to a new instruction
5151
///
52+
/// branch(to: InstAddr)
53+
///
5254
/// Operand: instruction address to branch to
5355
case branch
5456

5557
/// Conditionally branch
5658
///
57-
/// Operand: packed condition register and address to branch to
59+
/// condBranch(to: InstAddr, if: BoolReg)
60+
///
61+
/// Operands:
62+
/// - Address to branch to
63+
/// - Condition register to check
5864
case condBranch
5965

6066
/// Conditionally branch if zero, otherwise decrement
6167
///
62-
/// branch_cond_zero_else_decrement(_ i: IntReg, to: InstructionAddress)
68+
/// condBranch(
69+
/// to: InstAddr, ifZeroElseDecrement: IntReg)
6370
///
6471
/// Operands:
65-
/// - Int register to check for zero, otherwise decrease
6672
/// - Instruction address to branch to, if zero
73+
/// - Int register to check for zero, otherwise decrease
6774
///
6875
case condBranchZeroElseDecrement
6976

@@ -101,26 +108,48 @@ extension Instruction {
101108
case print
102109

103110

104-
105111
// MARK: - Matching
106112

107113
/// Advance the input position.
108114
///
115+
/// advance(_ amount: Distance)
116+
///
109117
/// Operand: Amount to advance by.
110118
case advance
111119

112120
// TODO: Is the amount useful here? Is it commonly more than 1?
113121

114122
/// Composite assert-advance else restore.
115123
///
124+
/// match(_: EltReg)
125+
///
116126
/// Operand: Element register to compare against.
117127
case match
118128

119129
/// Match against a sequence of elements
120130
///
131+
/// matchSequence(_: SeqReg)
132+
///
121133
/// Operand: Sequence register to compare against.
122134
case matchSequence
123135

136+
/// Match against a slice of the input
137+
///
138+
/// matchSlice(
139+
/// lowerBound: PositionReg, upperBound: PositionReg)
140+
///
141+
/// Operands:
142+
/// - Lowerbound position in the input
143+
/// - Upperbound position in the input
144+
case matchSlice
145+
146+
/// Save the current position in the input in a register
147+
///
148+
/// movePosition(into: PositionReg)
149+
///
150+
/// Operand: The position register to move into
151+
case movePosition
152+
124153
/// Match against a provided element.
125154
///
126155
/// Operand: Packed condition register to write to and element register to

Sources/_MatchingEngine/Engine/Processor.swift

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,32 @@ extension Processor {
133133
return slice
134134
}
135135

136+
mutating func match(_ e: Element) {
137+
guard let cur = load(), cur == e else {
138+
signalFailure()
139+
return
140+
}
141+
if consume(1) {
142+
controller.step()
143+
}
144+
}
145+
mutating func matchSeq<C: Collection>(
146+
_ seq: C
147+
) where C.Element == Input.Element {
148+
let count = seq.count
149+
150+
guard let inputSlice = load(count: count),
151+
seq.elementsEqual(inputSlice)
152+
else {
153+
signalFailure()
154+
return
155+
}
156+
guard consume(.init(count)) else {
157+
fatalError("unreachable")
158+
}
159+
controller.step()
160+
}
161+
136162
mutating func signalFailure() {
137163
guard let (pc, pos, stackEnd) = savePoints.popLast()?.destructure
138164
else {
@@ -197,6 +223,11 @@ extension Processor {
197223
registers[reg] = int
198224
controller.step()
199225

226+
case .movePosition:
227+
let reg = payload.position
228+
registers[reg] = currentPosition
229+
controller.step()
230+
200231
case .branch:
201232
controller.pc = payload.addr
202233

@@ -291,29 +322,18 @@ extension Processor {
291322

292323
case .match:
293324
let reg = payload.element
294-
guard let cur = load(), cur == registers[reg] else {
295-
signalFailure()
296-
return
297-
}
298-
if consume(1) {
299-
controller.step()
300-
}
325+
match(registers[reg])
301326

302327
case .matchSequence:
303328
let reg = payload.sequence
304329
let seq = registers[reg]
305-
let count = seq.count
330+
matchSeq(seq)
306331

307-
guard let inputSlice = load(count: count),
308-
seq.elementsEqual(inputSlice)
309-
else {
310-
signalFailure()
311-
return
312-
}
313-
guard consume(.init(count)) else {
314-
fatalError("unreachable")
315-
}
316-
controller.step()
332+
case .matchSlice:
333+
let (lower, upper) = payload.pairedPosPos
334+
let range = registers[lower]..<registers[upper]
335+
let slice = input[range]
336+
matchSeq(slice)
317337

318338
case .consumeBy:
319339
let reg = payload.consumer

Sources/_MatchingEngine/Engine/Registers.swift

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@ extension Processor {
2424
// unused
2525
var floats: [Double] = []
2626

27-
// unused
28-
//
29-
// Unlikely to be static, as that means input must be bound
30-
// at compile time
27+
// Currently, used for `movePosition` and `matchSlice`
3128
var positions: [Position] = []
3229

3330
// unused
@@ -56,6 +53,10 @@ extension Processor {
5653
get { bools[i.rawValue] }
5754
set { bools[i.rawValue] = newValue }
5855
}
56+
subscript(_ i: PositionRegister) -> Position {
57+
get { positions[i.rawValue] }
58+
set { positions[i.rawValue] = newValue }
59+
}
5960
subscript(_ i: ElementRegister) -> Element {
6061
elements[i.rawValue]
6162
}

Tests/MatchingEngineTests/MatchingEngineTests.swift

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ let eatThroughA: Engine<String> = {
226226
}
227227
}()
228228

229+
230+
229231
class MatchingEngineTests: XCTestCase {
230232

231233
func testAEaters() {
@@ -247,4 +249,42 @@ class MatchingEngineTests: XCTestCase {
247249
test.check(eatThroughA: eatThroughA)
248250
}
249251
}
252+
253+
func testThreeLetterRepeat() {
254+
// Check for a repeated 3-letter sequence, such as in
255+
// `(...)\1`
256+
//
257+
// [0] movePosition(into: %low)
258+
// [1] advance(3)
259+
// [2] movePosition(into: %high)
260+
// [3] matchSlice(%low, %high)
261+
// [4] accept
262+
let threeLetterRepeat: Engine<String> = {
263+
makeEngine { builder in
264+
let low = builder.makePositionRegister(
265+
initializingWithCurrentPosition: ())
266+
builder.buildAdvance(3)
267+
let high = builder.makePositionRegister(
268+
initializingWithCurrentPosition: ())
269+
builder.buildMatchSlice(lower: low, upper: high)
270+
builder.buildAccept()
271+
}
272+
}()
273+
274+
let tests: Array<(String, Bool)> = [
275+
("abcabc", true),
276+
("abcabc_____", true),
277+
("dddddd_____", true),
278+
("🥳🧟‍♀️c🥳🧟‍♀️c", true),
279+
("abccba", false),
280+
("abcabb", false),
281+
("abcbac", false),
282+
("🥳🧟‍♀️c🥳🧟‍♂️c", false),
283+
]
284+
285+
for (test, expect) in tests {
286+
let match = threeLetterRepeat.consume(test) != nil
287+
XCTAssertEqual(expect, match)
288+
}
289+
}
250290
}

0 commit comments

Comments
 (0)