Skip to content

More optimizations, remove history preservation #495

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -665,15 +665,19 @@ fileprivate extension Compiler.ByteCodeGen {
}
// If there's a capture transform, apply it now.
if let transform = transform {
let fn = builder.makeTransformFunction { input, storedCapture in
let fn = builder.makeTransformFunction { input, cap in
// If it's a substring capture with no custom value, apply the
// transform directly to the substring to avoid existential traffic.
if let cap = storedCapture.latest, cap.value == nil {
return try transform(input[cap.range])
//
// FIXME: separate out this code path. This is fragile,
// slow, and these are clearly different constructs
if let range = cap.range, cap.value == nil {
return try transform(input[range])
}

let value = constructExistentialOutputComponent(
from: input,
component: storedCapture.latest,
component: cap.deconstructed,
optionalCount: 0)
return try transform(value)
}
Expand Down
68 changes: 22 additions & 46 deletions Sources/_StringProcessing/Engine/MECapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,59 +32,48 @@

extension Processor {
struct _StoredCapture {
// Set whenever we push the very first capture, allows us
// to theoretically re-compute anything we want to later.
fileprivate var startState: SavePoint? = nil

// Save the entire history as we go, so that backtracking
// can just lop-off aborted runs.
//
// Backtracking entries can specify a per-capture stack
// index so that we can abort anything that came after.
//
// By remembering the entire history, we waste space, but
// we get flexibility for now.
//
fileprivate var history: Array<(range: Range<Position>, value: Any?)> = []
var range: Range<Position>? = nil

var value: Any? = nil

// An in-progress capture start
fileprivate var currentCaptureBegin: Position? = nil

fileprivate func _invariantCheck() {
if startState == nil {
assert(history.isEmpty)
assert(currentCaptureBegin == nil)
} else if currentCaptureBegin == nil {
assert(!history.isEmpty)
if range == nil {
assert(value == nil)
}
}

// MARK: - IPI

var isEmpty: Bool { history.isEmpty }

var latest: (range: Range<Position>, value: Any?)? { history.last }
var deconstructed: (range: Range<Position>, value: Any?)? {
guard let r = range else { return nil }
return (r, value)
}

/// Start a new capture. If the previously started one was un-ended,
/// will clear it and restart. If this is the first start, will save `initial`.
/// will clear it and restart.
mutating func startCapture(
_ idx: Position, initial: SavePoint
_ idx: Position
) {
_invariantCheck()
defer { _invariantCheck() }

if self.startState == nil {
self.startState = initial
}
currentCaptureBegin = idx
}

mutating func endCapture(_ idx: Position) {
_invariantCheck()
assert(currentCaptureBegin != nil)
defer { _invariantCheck() }

history.append((currentCaptureBegin! ..< idx, value: nil))
guard let low = currentCaptureBegin else {
fatalError("Invariant violated: ending unstarted capture")
}

range = low..<idx
value = nil // TODO: cleaner IPI around this...
currentCaptureBegin = nil
}

mutating func registerValue(
Expand All @@ -93,28 +82,15 @@ extension Processor {
) {
_invariantCheck()
defer { _invariantCheck() }
if let sp = overwriteInitial {
self.startState = sp
}
history[history.endIndex - 1].value = value
}

mutating func fail(truncatingAt stackIdx: Int) {
_invariantCheck()
assert(stackIdx <= history.endIndex)
defer { _invariantCheck() }

history.removeSubrange(stackIdx...)
if history.isEmpty {
startState = nil
}
self.value = value
}
}
}

extension Processor._StoredCapture: CustomStringConvertible {
var description: String {
return String(describing: history)
return String(describing: self)
}
}

Expand All @@ -124,10 +100,10 @@ struct MECaptureList {

func latestUntyped(from input: String) -> Array<Substring?> {
values.map {
guard let last = $0.latest else {
guard let range = $0.range else {
return nil
}
return input[last.0]
return input[range]
}
}
}
58 changes: 33 additions & 25 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ extension Processor {
input[bounds]
}

// Advance in our input, without any checks or failure signalling
mutating func _uncheckedForcedConsumeOne() {
assert(currentPosition != end)
input.formIndex(after: &currentPosition)
}

// Advance in our input
//
// Returns whether the advance succeeded. On failure, our
Expand Down Expand Up @@ -145,30 +151,26 @@ extension Processor {
return slice
}

mutating func match(_ e: Element) {
// Match against the current input element. Returns whether
// it succeeded vs signaling an error.
mutating func match(_ e: Element) -> Bool {
guard let cur = load(), cur == e else {
signalFailure()
return
}
if consume(1) {
controller.step()
return false
}
_uncheckedForcedConsumeOne()
return true
}

// Match against the current input prefix. Returns whether
// it succeeded vs signaling an error.
mutating func matchSeq<C: Collection>(
_ seq: C
) where C.Element == Input.Element {
let count = seq.count

guard let inputSlice = load(count: count),
seq.elementsEqual(inputSlice)
else {
signalFailure()
return
}
guard consume(.init(count)) else {
fatalError("unreachable")
) -> Bool where C.Element == Input.Element {
for e in seq {
guard match(e) else { return false }
}
controller.step()
return true
}

mutating func signalFailure() {
Expand Down Expand Up @@ -356,18 +358,24 @@ extension Processor {

case .match:
let reg = payload.element
match(registers[reg])
if match(registers[reg]) {
controller.step()
}

case .matchSequence:
let reg = payload.sequence
let seq = registers[reg]
matchSeq(seq)
if matchSeq(seq) {
controller.step()
}

case .matchSlice:
let (lower, upper) = payload.pairedPosPos
let range = registers[lower]..<registers[upper]
let slice = input[range]
matchSeq(slice)
if matchSeq(slice) {
controller.step()
}

case .consumeBy:
let reg = payload.consumer
Expand Down Expand Up @@ -439,19 +447,19 @@ extension Processor {
// Should we assert it's not finished yet?
// What's the behavior there?
let cap = storedCaptures[capNum]
guard let range = cap.latest?.range else {
guard let range = cap.range else {
signalFailure()
return
}
matchSeq(input[range])
if matchSeq(input[range]) {
controller.step()
}

case .beginCapture:
let capNum = Int(
asserting: payload.capture.rawValue)

let sp = makeSavePoint(self.currentPC)
storedCaptures[capNum].startCapture(
currentPosition, initial: sp)
storedCaptures[capNum].startCapture(currentPosition)
controller.step()

case .endCapture:
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Engine/Structuralize.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ extension CaptureList {
for (i, (cap, meStored)) in zip(captures, list.values).enumerated() {
let element = AnyRegexOutput.ElementRepresentation(
optionalDepth: cap.optionalDepth,
content: meStored.latest,
content: meStored.deconstructed,
name: cap.name,
referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key
)
Expand Down
9 changes: 7 additions & 2 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,19 @@ extension Regex {
// FIXME: Something more efficient, likely an engine interface, and we
// should scrap the RegexConsumer crap and call this

let executor = Executor(program: regex.program.loweredProgram)
let graphemeSemantic = regex.initialOptions.semanticLevel == .graphemeCluster

var low = inputRange.lowerBound
let high = inputRange.upperBound
while true {
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
if let m: Regex<Output>.Match = try executor.match(
input, in: low..<high, .partialFromFront
) {
return m
}
if low >= high { return nil }
if regex.initialOptions.semanticLevel == .graphemeCluster {
if graphemeSemantic {
input.formIndex(after: &low)
} else {
input.unicodeScalars.formIndex(after: &low)
Expand Down