diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 8407f68ac..bcfc8a2c2 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -665,15 +665,19 @@ fileprivate extension Compiler.ByteCodeGen { } // If there's a capture transform, apply it now. if let transform = transform { - let fn = builder.makeTransformFunction { input, storedCapture in + let fn = builder.makeTransformFunction { input, cap in // If it's a substring capture with no custom value, apply the // transform directly to the substring to avoid existential traffic. - if let cap = storedCapture.latest, cap.value == nil { - return try transform(input[cap.range]) + // + // FIXME: separate out this code path. This is fragile, + // slow, and these are clearly different constructs + if let range = cap.range, cap.value == nil { + return try transform(input[range]) } + let value = constructExistentialOutputComponent( from: input, - component: storedCapture.latest, + component: cap.deconstructed, optionalCount: 0) return try transform(value) } diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 054612e71..ec7c3668a 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -32,59 +32,48 @@ extension Processor { struct _StoredCapture { - // Set whenever we push the very first capture, allows us - // to theoretically re-compute anything we want to later. - fileprivate var startState: SavePoint? = nil - - // Save the entire history as we go, so that backtracking - // can just lop-off aborted runs. - // - // Backtracking entries can specify a per-capture stack - // index so that we can abort anything that came after. - // - // By remembering the entire history, we waste space, but - // we get flexibility for now. - // - fileprivate var history: Array<(range: Range, value: Any?)> = [] + var range: Range? = nil + + var value: Any? = nil // An in-progress capture start fileprivate var currentCaptureBegin: Position? = nil fileprivate func _invariantCheck() { - if startState == nil { - assert(history.isEmpty) - assert(currentCaptureBegin == nil) - } else if currentCaptureBegin == nil { - assert(!history.isEmpty) + if range == nil { + assert(value == nil) } } // MARK: - IPI - var isEmpty: Bool { history.isEmpty } - - var latest: (range: Range, value: Any?)? { history.last } + var deconstructed: (range: Range, value: Any?)? { + guard let r = range else { return nil } + return (r, value) + } /// Start a new capture. If the previously started one was un-ended, - /// will clear it and restart. If this is the first start, will save `initial`. + /// will clear it and restart. mutating func startCapture( - _ idx: Position, initial: SavePoint + _ idx: Position ) { _invariantCheck() defer { _invariantCheck() } - if self.startState == nil { - self.startState = initial - } currentCaptureBegin = idx } mutating func endCapture(_ idx: Position) { _invariantCheck() - assert(currentCaptureBegin != nil) defer { _invariantCheck() } - history.append((currentCaptureBegin! ..< idx, value: nil)) + guard let low = currentCaptureBegin else { + fatalError("Invariant violated: ending unstarted capture") + } + + range = low.. Array { values.map { - guard let last = $0.latest else { + guard let range = $0.range else { return nil } - return input[last.0] + return input[range] } } } diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index da8bfea14..5f58394d3 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -103,6 +103,12 @@ extension Processor { input[bounds] } + // Advance in our input, without any checks or failure signalling + mutating func _uncheckedForcedConsumeOne() { + assert(currentPosition != end) + input.formIndex(after: ¤tPosition) + } + // Advance in our input // // Returns whether the advance succeeded. On failure, our @@ -145,30 +151,26 @@ extension Processor { return slice } - mutating func match(_ e: Element) { + // Match against the current input element. Returns whether + // it succeeded vs signaling an error. + mutating func match(_ e: Element) -> Bool { guard let cur = load(), cur == e else { signalFailure() - return - } - if consume(1) { - controller.step() + return false } + _uncheckedForcedConsumeOne() + return true } + + // Match against the current input prefix. Returns whether + // it succeeded vs signaling an error. mutating func matchSeq( _ seq: C - ) where C.Element == Input.Element { - let count = seq.count - - guard let inputSlice = load(count: count), - seq.elementsEqual(inputSlice) - else { - signalFailure() - return - } - guard consume(.init(count)) else { - fatalError("unreachable") + ) -> Bool where C.Element == Input.Element { + for e in seq { + guard match(e) else { return false } } - controller.step() + return true } mutating func signalFailure() { @@ -356,18 +358,24 @@ extension Processor { case .match: let reg = payload.element - match(registers[reg]) + if match(registers[reg]) { + controller.step() + } case .matchSequence: let reg = payload.sequence let seq = registers[reg] - matchSeq(seq) + if matchSeq(seq) { + controller.step() + } case .matchSlice: let (lower, upper) = payload.pairedPosPos let range = registers[lower]...Match = try executor.match( + input, in: low..= high { return nil } - if regex.initialOptions.semanticLevel == .graphemeCluster { + if graphemeSemantic { input.formIndex(after: &low) } else { input.unicodeScalars.formIndex(after: &low)