From 1c50722b90ef6a9ce09f29719ff8ba31295ae4d4 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 15 Jun 2022 10:19:40 -0700 Subject: [PATCH 1/4] Reuse executor in firstMatch --- Sources/_StringProcessing/Regex/Match.swift | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 78c9c8c9f..26f053a02 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -153,11 +153,14 @@ extension Regex { ) throws -> Regex.Match? { // FIXME: Something more efficient, likely an engine interface, and we // should scrap the RegexConsumer crap and call this - + let executor = Executor(program: regex.program.loweredProgram) + var low = inputRange.lowerBound let high = inputRange.upperBound while true { - if let m = try _match(input, in: low...Match = try executor.match( + input, in: low..= high { return nil } From 218bd33d365ff2df789370548ba540fbe38ceae2 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 15 Jun 2022 12:12:58 -0700 Subject: [PATCH 2/4] Reuse processor next step: reuse registers --- .../_StringProcessing/Engine/Processor.swift | 24 +++++++++++- Sources/_StringProcessing/Executor.swift | 39 ++++++++++++++++++- Sources/_StringProcessing/Regex/Match.swift | 20 ++-------- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 8f777ad33..c65b330e6 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -32,7 +32,7 @@ struct Processor< typealias Element = Input.Element let input: Input - let bounds: Range + var bounds: Range let matchMode: MatchMode var currentPosition: Position @@ -94,6 +94,28 @@ extension Processor { assert(currentPosition >= start) assert(currentPosition <= end) } + + // note: should processor just hold the entire program then? + mutating func reset( + _ program: MEProgram, + newBounds: Range) + { + self.controller = Controller(pc: 0) + self.bounds = newBounds + self.currentPosition = bounds.lowerBound + + // note: implement registers.reset() and captures.reset()? + // this would avoid having to pass the program around + self.registers = Registers(program, bounds.upperBound) + self.storedCaptures = Array( + repeating: .init(), count: program.registerInfo.captures) + + self.cycleCount = 0 + self.savePoints = [] + self.callStack = [] + self.state = .inProgress + self.failureReason = nil + } } extension Processor { diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 532a41256..d9c55c3b4 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -27,7 +27,42 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: inputRange, matchMode: mode) - + return try consume(input, &cpu) + } + + @available(SwiftStdlib 5.7, *) + func firstMatch( + _ input: String, + in inputRange: Range, + level: MatchingOptions.SemanticLevel + ) throws -> Regex.Match? { + var low = inputRange.lowerBound + let high = inputRange.upperBound + let mode: MatchMode = .partialFromFront + var cpu = engine.makeProcessor( + input: input, bounds: inputRange, matchMode: mode) + + while true { + if let m: Regex.Match = try consume(input, &cpu) { + return m + } + + if low >= high { return nil } + if level == .graphemeCluster { + input.formIndex(after: &low) + } else { + input.unicodeScalars.formIndex(after: &low) + } + + cpu.reset(engine.program, newBounds: low..( + _ input: String, + _ cpu: inout Processor + ) throws -> Regex.Match? { guard let endIdx = cpu.consume() else { if let e = cpu.failureReason { throw e @@ -40,7 +75,7 @@ struct Executor { referencedCaptureOffsets: engine.program.referencedCaptureOffsets, namedCaptureOffsets: engine.program.namedCaptureOffsets) - let range = inputRange.lowerBound...Match = try executor.match( - input, in: low..= high { return nil } - if regex.initialOptions.semanticLevel == .graphemeCluster { - input.formIndex(after: &low) - } else { - input.unicodeScalars.formIndex(after: &low) - } - } + return try executor.firstMatch( + input, in: inputRange, + level: regex.initialOptions.semanticLevel + ) } } From b559197701f8fc935b9fbf19f8daa3e309745833 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 15 Jun 2022 12:53:05 -0700 Subject: [PATCH 3/4] Add register resetting --- .../_StringProcessing/Engine/Processor.swift | 21 +++++++------------ .../_StringProcessing/Engine/Registers.swift | 20 +++++++++++++++++- Sources/_StringProcessing/Executor.swift | 8 +++---- Sources/_StringProcessing/Regex/Match.swift | 2 -- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index c65b330e6..6033ed2bc 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -94,22 +94,17 @@ extension Processor { assert(currentPosition >= start) assert(currentPosition <= end) } - - // note: should processor just hold the entire program then? - mutating func reset( - _ program: MEProgram, - newBounds: Range) + + mutating func reset(newBounds: Range) { - self.controller = Controller(pc: 0) self.bounds = newBounds + + self.controller = Controller(pc: 0) self.currentPosition = bounds.lowerBound - - // note: implement registers.reset() and captures.reset()? - // this would avoid having to pass the program around - self.registers = Registers(program, bounds.upperBound) - self.storedCaptures = Array( - repeating: .init(), count: program.registerInfo.captures) - + + self.registers.reset(bounds.upperBound) + self.storedCaptures = self.storedCaptures.map {_ in .init()} + self.cycleCount = 0 self.savePoints = [] self.callStack = [] diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift index bc17f1215..9cc427a89 100644 --- a/Sources/_StringProcessing/Engine/Registers.swift +++ b/Sources/_StringProcessing/Engine/Registers.swift @@ -18,6 +18,8 @@ struct SentinelValue: Hashable, CustomStringConvertible { extension Processor { /// Our register file struct Registers { + var info: MEProgram.RegisterInfo + // currently, these are static readonly var elements: [Element] @@ -114,7 +116,7 @@ extension Processor.Registers { _ program: MEProgram, _ sentinel: Input.Index ) { - let info = program.registerInfo + self.info = program.registerInfo self.elements = program.staticElements assert(elements.count == info.elements) @@ -156,6 +158,22 @@ extension Processor.Registers { self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses) } + + mutating func reset(_ sentinel: Input.Index) { + // note: Is there any issue with the program transform functions holding + // state and not getting reset here? Do we care? + self.bools = Array(repeating: false, count: info.bools) + self.ints = Array(repeating: 0, count: info.ints) + self.floats = Array(repeating: 0, count: info.floats) + self.positions = Array(repeating: sentinel, count: info.positions) + self.values = Array( + repeating: SentinelValue(), count: info.values) + + self.instructionAddresses = Array(repeating: 0, count: info.instructionAddresses) + self.classStackAddresses = Array(repeating: 0, count: info.classStackAddresses) + self.positionStackAddresses = Array(repeating: 0, count: info.positionStackAddresses) + self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses) + } } extension MEProgram { diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index d9c55c3b4..88b194a1d 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -41,20 +41,20 @@ struct Executor { let mode: MatchMode = .partialFromFront var cpu = engine.makeProcessor( input: input, bounds: inputRange, matchMode: mode) - + while true { if let m: Regex.Match = try consume(input, &cpu) { return m } - + if low >= high { return nil } if level == .graphemeCluster { input.formIndex(after: &low) } else { input.unicodeScalars.formIndex(after: &low) } - - cpu.reset(engine.program, newBounds: low.. ) throws -> Regex.Match? { - // FIXME: Something more efficient, likely an engine interface, and we - // should scrap the RegexConsumer crap and call this let executor = Executor(program: regex.program.loweredProgram) return try executor.firstMatch( input, in: inputRange, From 7aa82fde79a4345c0ef0c0225450e804f567b337 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 15 Jun 2022 15:57:36 -0700 Subject: [PATCH 4/4] Clear arrays in place --- .../_StringProcessing/Engine/Processor.swift | 17 +++++++------ .../_StringProcessing/Engine/Registers.swift | 24 +++++++++++-------- Sources/_StringProcessing/Executor.swift | 15 ++++++++---- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 6033ed2bc..a67e621db 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -32,7 +32,7 @@ struct Processor< typealias Element = Input.Element let input: Input - var bounds: Range + let bounds: Range let matchMode: MatchMode var currentPosition: Position @@ -95,19 +95,18 @@ extension Processor { assert(currentPosition <= end) } - mutating func reset(newBounds: Range) + mutating func reset(newPosition: Position) { - self.bounds = newBounds - self.controller = Controller(pc: 0) - self.currentPosition = bounds.lowerBound + self.currentPosition = newPosition self.registers.reset(bounds.upperBound) - self.storedCaptures = self.storedCaptures.map {_ in .init()} + for idx in storedCaptures.indices { + storedCaptures[idx] = .init() + } - self.cycleCount = 0 - self.savePoints = [] - self.callStack = [] + self.savePoints.removeAll(keepingCapacity: true) + self.callStack.removeAll(keepingCapacity: true) self.state = .inProgress self.failureReason = nil } diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift index 9cc427a89..e9ea3c4a9 100644 --- a/Sources/_StringProcessing/Engine/Registers.swift +++ b/Sources/_StringProcessing/Engine/Registers.swift @@ -162,17 +162,21 @@ extension Processor.Registers { mutating func reset(_ sentinel: Input.Index) { // note: Is there any issue with the program transform functions holding // state and not getting reset here? Do we care? - self.bools = Array(repeating: false, count: info.bools) - self.ints = Array(repeating: 0, count: info.ints) - self.floats = Array(repeating: 0, count: info.floats) - self.positions = Array(repeating: sentinel, count: info.positions) - self.values = Array( - repeating: SentinelValue(), count: info.values) + func clear(_ xs: inout [T], _ v: T) { + for idx in xs.indices { + xs[idx] = v + } + } - self.instructionAddresses = Array(repeating: 0, count: info.instructionAddresses) - self.classStackAddresses = Array(repeating: 0, count: info.classStackAddresses) - self.positionStackAddresses = Array(repeating: 0, count: info.positionStackAddresses) - self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses) + clear(&self.bools, false) + clear(&self.ints, 0) + clear(&self.floats, 0) + clear(&self.positions, sentinel) + clear(&self.values, SentinelValue()) + clear(&self.instructionAddresses, 0) + clear(&self.classStackAddresses, 0) + clear(&self.positionStackAddresses, 0) + clear(&self.savePointAddresses, 0) } } diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 88b194a1d..520145ca1 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -27,7 +27,7 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: inputRange, matchMode: mode) - return try consume(input, &cpu) + return try consume(input, &cpu, startingFrom: inputRange.lowerBound) } @available(SwiftStdlib 5.7, *) @@ -43,7 +43,11 @@ struct Executor { input: input, bounds: inputRange, matchMode: mode) while true { - if let m: Regex.Match = try consume(input, &cpu) { + if let m: Regex.Match = try consume( + input, + &cpu, + startingFrom: low + ) { return m } @@ -54,14 +58,15 @@ struct Executor { input.unicodeScalars.formIndex(after: &low) } - cpu.reset(newBounds: low..( _ input: String, - _ cpu: inout Processor + _ cpu: inout Processor, + startingFrom startIdx: String.Index ) throws -> Regex.Match? { guard let endIdx = cpu.consume() else { if let e = cpu.failureReason { @@ -75,7 +80,7 @@ struct Executor { referencedCaptureOffsets: engine.program.referencedCaptureOffsets, namedCaptureOffsets: engine.program.namedCaptureOffsets) - let range = cpu.bounds.lowerBound..