Skip to content

Reuse the executor in firstMatch #489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct Processor<
typealias Element = Input.Element

let input: Input
let bounds: Range<Position>
var bounds: Range<Position>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a bug or series of bugs that @natecook1000 is working on, but we probably won't want to be changing the bounds here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we're fine with leaving the lower bound at the beginning of the string? I guess it still makes sense as the bounds of the string and we just need to set currentPosition to be where we want to start from when we reset?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, though it might temporarily break some firstMatch tests until some anchors get fixed. @natecook1000 can you work with Lily here?

let matchMode: MatchMode
var currentPosition: Position

Expand Down Expand Up @@ -94,6 +94,23 @@ extension Processor {
assert(currentPosition >= start)
assert(currentPosition <= end)
}

mutating func reset(newBounds: Range<Position>)
{
self.bounds = newBounds

self.controller = Controller(pc: 0)
self.currentPosition = bounds.lowerBound

self.registers.reset(bounds.upperBound)
self.storedCaptures = self.storedCaptures.map {_ in .init()}

self.cycleCount = 0
self.savePoints = []
self.callStack = []
self.state = .inProgress
self.failureReason = nil
}
}

extension Processor {
Expand Down
20 changes: 19 additions & 1 deletion Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct SentinelValue: Hashable, CustomStringConvertible {
extension Processor {
/// Our register file
struct Registers {
var info: MEProgram<Input>.RegisterInfo

// currently, these are static readonly
var elements: [Element]

Expand Down Expand Up @@ -114,7 +116,7 @@ extension Processor.Registers {
_ program: MEProgram<Input>,
_ sentinel: Input.Index
) {
let info = program.registerInfo
self.info = program.registerInfo

self.elements = program.staticElements
assert(elements.count == info.elements)
Expand Down Expand Up @@ -156,6 +158,22 @@ extension Processor.Registers {

self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses)
}

mutating func reset(_ sentinel: Input.Index) {
// note: Is there any issue with the program transform functions holding
// state and not getting reset here? Do we care?
self.bools = Array(repeating: false, count: info.bools)
self.ints = Array(repeating: 0, count: info.ints)
self.floats = Array(repeating: 0, count: info.floats)
self.positions = Array(repeating: sentinel, count: info.positions)
self.values = Array(
repeating: SentinelValue(), count: info.values)

self.instructionAddresses = Array(repeating: 0, count: info.instructionAddresses)
self.classStackAddresses = Array(repeating: 0, count: info.classStackAddresses)
self.positionStackAddresses = Array(repeating: 0, count: info.positionStackAddresses)
self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses)
}
}

extension MEProgram {
Expand Down
37 changes: 36 additions & 1 deletion Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,42 @@ struct Executor {
) throws -> Regex<Output>.Match? {
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)
return try consume(input, &cpu)
}

@available(SwiftStdlib 5.7, *)
func firstMatch<Output>(
_ input: String,
in inputRange: Range<String.Index>,
level: MatchingOptions.SemanticLevel
) throws -> Regex<Output>.Match? {
var low = inputRange.lowerBound
let high = inputRange.upperBound
let mode: MatchMode = .partialFromFront
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)

while true {
if let m: Regex<Output>.Match = try consume(input, &cpu) {
return m
}

if low >= high { return nil }
if level == .graphemeCluster {
input.formIndex(after: &low)
} else {
input.unicodeScalars.formIndex(after: &low)
}

cpu.reset(newBounds: low..<high)
}
}

@available(SwiftStdlib 5.7, *)
func consume<Output>(
_ input: String,
_ cpu: inout Processor<String>
) throws -> Regex<Output>.Match? {
guard let endIdx = cpu.consume() else {
if let e = cpu.failureReason {
throw e
Expand All @@ -40,7 +75,7 @@ struct Executor {
referencedCaptureOffsets: engine.program.referencedCaptureOffsets,
namedCaptureOffsets: engine.program.namedCaptureOffsets)

let range = inputRange.lowerBound..<endIdx
let range = cpu.bounds.lowerBound..<endIdx
let caps = engine.program.captureList.createElements(capList, input)

// FIXME: This is a workaround for not tracking (or
Expand Down
21 changes: 5 additions & 16 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -151,22 +151,11 @@ extension Regex {
_ input: String,
in inputRange: Range<String.Index>
) throws -> Regex<Output>.Match? {
// FIXME: Something more efficient, likely an engine interface, and we
// should scrap the RegexConsumer crap and call this

var low = inputRange.lowerBound
let high = inputRange.upperBound
while true {
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
return m
}
if low >= high { return nil }
if regex.initialOptions.semanticLevel == .graphemeCluster {
input.formIndex(after: &low)
} else {
input.unicodeScalars.formIndex(after: &low)
}
}
let executor = Executor(program: regex.program.loweredProgram)
return try executor.firstMatch(
input, in: inputRange,
level: regex.initialOptions.semanticLevel
)
}
}

Expand Down