diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 8f777ad33..a67e621db 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -94,6 +94,22 @@ extension Processor { assert(currentPosition >= start) assert(currentPosition <= end) } + + mutating func reset(newPosition: Position) + { + self.controller = Controller(pc: 0) + self.currentPosition = newPosition + + self.registers.reset(bounds.upperBound) + for idx in storedCaptures.indices { + storedCaptures[idx] = .init() + } + + self.savePoints.removeAll(keepingCapacity: true) + self.callStack.removeAll(keepingCapacity: true) + self.state = .inProgress + self.failureReason = nil + } } extension Processor { diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift index bc17f1215..e9ea3c4a9 100644 --- a/Sources/_StringProcessing/Engine/Registers.swift +++ b/Sources/_StringProcessing/Engine/Registers.swift @@ -18,6 +18,8 @@ struct SentinelValue: Hashable, CustomStringConvertible { extension Processor { /// Our register file struct Registers { + var info: MEProgram.RegisterInfo + // currently, these are static readonly var elements: [Element] @@ -114,7 +116,7 @@ extension Processor.Registers { _ program: MEProgram, _ sentinel: Input.Index ) { - let info = program.registerInfo + self.info = program.registerInfo self.elements = program.staticElements assert(elements.count == info.elements) @@ -156,6 +158,26 @@ extension Processor.Registers { self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses) } + + mutating func reset(_ sentinel: Input.Index) { + // note: Is there any issue with the program transform functions holding + // state and not getting reset here? Do we care? + func clear(_ xs: inout [T], _ v: T) { + for idx in xs.indices { + xs[idx] = v + } + } + + clear(&self.bools, false) + clear(&self.ints, 0) + clear(&self.floats, 0) + clear(&self.positions, sentinel) + clear(&self.values, SentinelValue()) + clear(&self.instructionAddresses, 0) + clear(&self.classStackAddresses, 0) + clear(&self.positionStackAddresses, 0) + clear(&self.savePointAddresses, 0) + } } extension MEProgram { diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 532a41256..520145ca1 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -27,7 +27,47 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: inputRange, matchMode: mode) + return try consume(input, &cpu, startingFrom: inputRange.lowerBound) + } + + @available(SwiftStdlib 5.7, *) + func firstMatch( + _ input: String, + in inputRange: Range, + level: MatchingOptions.SemanticLevel + ) throws -> Regex.Match? { + var low = inputRange.lowerBound + let high = inputRange.upperBound + let mode: MatchMode = .partialFromFront + var cpu = engine.makeProcessor( + input: input, bounds: inputRange, matchMode: mode) + + while true { + if let m: Regex.Match = try consume( + input, + &cpu, + startingFrom: low + ) { + return m + } + + if low >= high { return nil } + if level == .graphemeCluster { + input.formIndex(after: &low) + } else { + input.unicodeScalars.formIndex(after: &low) + } + cpu.reset(newPosition: low) + } + } + + @available(SwiftStdlib 5.7, *) + func consume( + _ input: String, + _ cpu: inout Processor, + startingFrom startIdx: String.Index + ) throws -> Regex.Match? { guard let endIdx = cpu.consume() else { if let e = cpu.failureReason { throw e @@ -40,7 +80,7 @@ struct Executor { referencedCaptureOffsets: engine.program.referencedCaptureOffsets, namedCaptureOffsets: engine.program.namedCaptureOffsets) - let range = inputRange.lowerBound.. ) throws -> Regex.Match? { - // FIXME: Something more efficient, likely an engine interface, and we - // should scrap the RegexConsumer crap and call this - - var low = inputRange.lowerBound - let high = inputRange.upperBound - while true { - if let m = try _match(input, in: low..= high { return nil } - if regex.initialOptions.semanticLevel == .graphemeCluster { - input.formIndex(after: &low) - } else { - input.unicodeScalars.formIndex(after: &low) - } - } + let executor = Executor(program: regex.program.loweredProgram) + return try executor.firstMatch( + input, in: inputRange, + level: regex.initialOptions.semanticLevel + ) } }