Skip to content

Commit 492b371

Browse files
committed
More optimizations, remove history preservation (swiftlang#495)
* Re-use the same executor, remember semantic mode. Gives around a 20% perf improvement to first-match style benchmarks. * Remove history preservation Cuts down on memory usage and avoids some ARC overhead. ~20% gains on "AllMatches" and related benchmarks. * Lower-level matchSeq Avoid collection algorithms inside matchSeq, which are liable to add ARC and inefficiencies. Results in a 3x improvement to ReluctantQuantWithTerminal.
1 parent 3b475de commit 492b371

File tree

5 files changed

+71
-78
lines changed

5 files changed

+71
-78
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -630,15 +630,19 @@ fileprivate extension Compiler.ByteCodeGen {
630630
}
631631
// If there's a capture transform, apply it now.
632632
if let transform = transform {
633-
let fn = builder.makeTransformFunction { input, storedCapture in
633+
let fn = builder.makeTransformFunction { input, cap in
634634
// If it's a substring capture with no custom value, apply the
635635
// transform directly to the substring to avoid existential traffic.
636-
if let cap = storedCapture.latest, cap.value == nil {
637-
return try transform(input[cap.range])
636+
//
637+
// FIXME: separate out this code path. This is fragile,
638+
// slow, and these are clearly different constructs
639+
if let range = cap.range, cap.value == nil {
640+
return try transform(input[range])
638641
}
642+
639643
let value = constructExistentialOutputComponent(
640644
from: input,
641-
component: storedCapture.latest,
645+
component: cap.deconstructed,
642646
optionalCount: 0)
643647
return try transform(value)
644648
}

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 22 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -32,59 +32,48 @@
3232

3333
extension Processor {
3434
struct _StoredCapture {
35-
// Set whenever we push the very first capture, allows us
36-
// to theoretically re-compute anything we want to later.
37-
fileprivate var startState: SavePoint? = nil
38-
39-
// Save the entire history as we go, so that backtracking
40-
// can just lop-off aborted runs.
41-
//
42-
// Backtracking entries can specify a per-capture stack
43-
// index so that we can abort anything that came after.
44-
//
45-
// By remembering the entire history, we waste space, but
46-
// we get flexibility for now.
47-
//
48-
fileprivate var history: Array<(range: Range<Position>, value: Any?)> = []
35+
var range: Range<Position>? = nil
36+
37+
var value: Any? = nil
4938

5039
// An in-progress capture start
5140
fileprivate var currentCaptureBegin: Position? = nil
5241

5342
fileprivate func _invariantCheck() {
54-
if startState == nil {
55-
assert(history.isEmpty)
56-
assert(currentCaptureBegin == nil)
57-
} else if currentCaptureBegin == nil {
58-
assert(!history.isEmpty)
43+
if range == nil {
44+
assert(value == nil)
5945
}
6046
}
6147

6248
// MARK: - IPI
6349

64-
var isEmpty: Bool { history.isEmpty }
65-
66-
var latest: (range: Range<Position>, value: Any?)? { history.last }
50+
var deconstructed: (range: Range<Position>, value: Any?)? {
51+
guard let r = range else { return nil }
52+
return (r, value)
53+
}
6754

6855
/// Start a new capture. If the previously started one was un-ended,
69-
/// will clear it and restart. If this is the first start, will save `initial`.
56+
/// will clear it and restart.
7057
mutating func startCapture(
71-
_ idx: Position, initial: SavePoint
58+
_ idx: Position
7259
) {
7360
_invariantCheck()
7461
defer { _invariantCheck() }
7562

76-
if self.startState == nil {
77-
self.startState = initial
78-
}
7963
currentCaptureBegin = idx
8064
}
8165

8266
mutating func endCapture(_ idx: Position) {
8367
_invariantCheck()
84-
assert(currentCaptureBegin != nil)
8568
defer { _invariantCheck() }
8669

87-
history.append((currentCaptureBegin! ..< idx, value: nil))
70+
guard let low = currentCaptureBegin else {
71+
fatalError("Invariant violated: ending unstarted capture")
72+
}
73+
74+
range = low..<idx
75+
value = nil // TODO: cleaner IPI around this...
76+
currentCaptureBegin = nil
8877
}
8978

9079
mutating func registerValue(
@@ -93,28 +82,15 @@ extension Processor {
9382
) {
9483
_invariantCheck()
9584
defer { _invariantCheck() }
96-
if let sp = overwriteInitial {
97-
self.startState = sp
98-
}
99-
history[history.endIndex - 1].value = value
100-
}
101-
102-
mutating func fail(truncatingAt stackIdx: Int) {
103-
_invariantCheck()
104-
assert(stackIdx <= history.endIndex)
105-
defer { _invariantCheck() }
10685

107-
history.removeSubrange(stackIdx...)
108-
if history.isEmpty {
109-
startState = nil
110-
}
86+
self.value = value
11187
}
11288
}
11389
}
11490

11591
extension Processor._StoredCapture: CustomStringConvertible {
11692
var description: String {
117-
return String(describing: history)
93+
return String(describing: self)
11894
}
11995
}
12096

@@ -124,10 +100,10 @@ struct MECaptureList {
124100

125101
func latestUntyped(from input: String) -> Array<Substring?> {
126102
values.map {
127-
guard let last = $0.latest else {
103+
guard let range = $0.range else {
128104
return nil
129105
}
130-
return input[last.0]
106+
return input[range]
131107
}
132108
}
133109
}

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ extension Processor {
103103
input[bounds]
104104
}
105105

106+
// Advance in our input, without any checks or failure signalling
107+
mutating func _uncheckedForcedConsumeOne() {
108+
assert(currentPosition != end)
109+
input.formIndex(after: &currentPosition)
110+
}
111+
106112
// Advance in our input
107113
//
108114
// Returns whether the advance succeeded. On failure, our
@@ -141,30 +147,26 @@ extension Processor {
141147
return slice
142148
}
143149

144-
mutating func match(_ e: Element) {
150+
// Match against the current input element. Returns whether
151+
// it succeeded vs signaling an error.
152+
mutating func match(_ e: Element) -> Bool {
145153
guard let cur = load(), cur == e else {
146154
signalFailure()
147-
return
148-
}
149-
if consume(1) {
150-
controller.step()
155+
return false
151156
}
157+
_uncheckedForcedConsumeOne()
158+
return true
152159
}
160+
161+
// Match against the current input prefix. Returns whether
162+
// it succeeded vs signaling an error.
153163
mutating func matchSeq<C: Collection>(
154164
_ seq: C
155-
) where C.Element == Input.Element {
156-
let count = seq.count
157-
158-
guard let inputSlice = load(count: count),
159-
seq.elementsEqual(inputSlice)
160-
else {
161-
signalFailure()
162-
return
163-
}
164-
guard consume(.init(count)) else {
165-
fatalError("unreachable")
165+
) -> Bool where C.Element == Input.Element {
166+
for e in seq {
167+
guard match(e) else { return false }
166168
}
167-
controller.step()
169+
return true
168170
}
169171

170172
mutating func signalFailure() {
@@ -337,18 +339,24 @@ extension Processor {
337339

338340
case .match:
339341
let reg = payload.element
340-
match(registers[reg])
342+
if match(registers[reg]) {
343+
controller.step()
344+
}
341345

342346
case .matchSequence:
343347
let reg = payload.sequence
344348
let seq = registers[reg]
345-
matchSeq(seq)
349+
if matchSeq(seq) {
350+
controller.step()
351+
}
346352

347353
case .matchSlice:
348354
let (lower, upper) = payload.pairedPosPos
349355
let range = registers[lower]..<registers[upper]
350356
let slice = input[range]
351-
matchSeq(slice)
357+
if matchSeq(slice) {
358+
controller.step()
359+
}
352360

353361
case .consumeBy:
354362
let reg = payload.consumer
@@ -420,19 +428,19 @@ extension Processor {
420428
// Should we assert it's not finished yet?
421429
// What's the behavior there?
422430
let cap = storedCaptures[capNum]
423-
guard let range = cap.latest?.range else {
431+
guard let range = cap.range else {
424432
signalFailure()
425433
return
426434
}
427-
matchSeq(input[range])
435+
if matchSeq(input[range]) {
436+
controller.step()
437+
}
428438

429439
case .beginCapture:
430440
let capNum = Int(
431441
asserting: payload.capture.rawValue)
432442

433-
let sp = makeSavePoint(self.currentPC)
434-
storedCaptures[capNum].startCapture(
435-
currentPosition, initial: sp)
443+
storedCaptures[capNum].startCapture(currentPosition)
436444
controller.step()
437445

438446
case .endCapture:

Sources/_StringProcessing/Engine/Structuralize.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ extension CaptureList {
1212
for (i, (cap, meStored)) in zip(captures, list.values).enumerated() {
1313
let element = AnyRegexOutput.ElementRepresentation(
1414
optionalDepth: cap.optionalDepth,
15-
content: meStored.latest,
15+
content: meStored.deconstructed,
1616
name: cap.name,
1717
referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key
1818
)

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,19 @@ extension Regex {
140140
// FIXME: Something more efficient, likely an engine interface, and we
141141
// should scrap the RegexConsumer crap and call this
142142

143+
let executor = Executor(program: regex.program.loweredProgram)
144+
let graphemeSemantic = regex.initialOptions.semanticLevel == .graphemeCluster
145+
143146
var low = inputRange.lowerBound
144147
let high = inputRange.upperBound
145148
while true {
146-
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
149+
if let m: Regex<Output>.Match = try executor.match(
150+
input, in: low..<high, .partialFromFront
151+
) {
147152
return m
148153
}
149154
if low >= high { return nil }
150-
if regex.initialOptions.semanticLevel == .graphemeCluster {
155+
if graphemeSemantic {
151156
input.formIndex(after: &low)
152157
} else {
153158
input.unicodeScalars.formIndex(after: &low)

0 commit comments

Comments
 (0)