Skip to content

Overhaul quantification fast-path #689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 33 additions & 33 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -459,16 +459,16 @@ fileprivate extension Compiler.ByteCodeGen {
assert(high != 0)
assert((0...(high ?? Int.max)).contains(low))

let extraTrips: Int?
let maxExtraTrips: Int?
if let h = high {
extraTrips = h - low
maxExtraTrips = h - low
} else {
extraTrips = nil
maxExtraTrips = nil
}
let minTrips = low
assert((extraTrips ?? 1) >= 0)
assert((maxExtraTrips ?? 1) >= 0)

if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) {
if tryEmitFastQuant(child, updatedKind, minTrips, maxExtraTrips) {
return
}

Expand All @@ -486,19 +486,19 @@ fileprivate extension Compiler.ByteCodeGen {
decrement %minTrips and fallthrough

loop-body:
<if can't guarantee forward progress && extraTrips = nil>:
<if can't guarantee forward progress && maxExtraTrips = nil>:
mov currentPosition %pos
evaluate the subexpression
<if can't guarantee forward progress && extraTrips = nil>:
<if can't guarantee forward progress && maxExtraTrips = nil>:
if %pos is currentPosition:
goto exit
goto min-trip-count control block

exit-policy control block:
if %extraTrips is zero:
if %maxExtraTrips is zero:
goto exit
else:
decrement %extraTrips and fallthrough
decrement %maxExtraTrips and fallthrough

<if eager>:
save exit and goto loop-body
Expand All @@ -525,12 +525,12 @@ fileprivate extension Compiler.ByteCodeGen {
/* fallthrough */
"""

// Specialization based on `extraTrips` for 0 or unbounded
// Specialization based on `maxExtraTrips` for 0 or unbounded
_ = """
exit-policy control block:
<if extraTrips == 0>:
<if maxExtraTrips == 0>:
goto exit
<if extraTrips == .unbounded>:
<if maxExtraTrips == .unbounded>:
/* fallthrough */
"""

Expand Down Expand Up @@ -563,12 +563,12 @@ fileprivate extension Compiler.ByteCodeGen {
minTripsReg = nil
}

let extraTripsReg: IntRegister?
if (extraTrips ?? 0) > 0 {
extraTripsReg = builder.makeIntRegister(
initialValue: extraTrips!)
let maxExtraTripsReg: IntRegister?
if (maxExtraTrips ?? 0) > 0 {
maxExtraTripsReg = builder.makeIntRegister(
initialValue: maxExtraTrips!)
} else {
extraTripsReg = nil
maxExtraTripsReg = nil
}

// Set up a dummy save point for possessive to update
Expand Down Expand Up @@ -600,7 +600,7 @@ fileprivate extension Compiler.ByteCodeGen {
let startPosition: PositionRegister?
let emitPositionChecking =
(!optimizationsEnabled || !child.guaranteesForwardProgress) &&
extraTrips == nil
maxExtraTrips == nil

if emitPositionChecking {
startPosition = builder.makePositionRegister()
Expand All @@ -610,7 +610,7 @@ fileprivate extension Compiler.ByteCodeGen {
}
try emitNode(child)
if emitPositionChecking {
// in all quantifier cases, no matter what minTrips or extraTrips is,
// in all quantifier cases, no matter what minTrips or maxExtraTrips is,
// if we have a successful non-advancing match, branch to exit because it
// can match an arbitrary number of times
builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!)
Expand All @@ -623,20 +623,20 @@ fileprivate extension Compiler.ByteCodeGen {
}

// exit-policy:
// condBranch(to: exit, ifZeroElseDecrement: %extraTrips)
// condBranch(to: exit, ifZeroElseDecrement: %maxExtraTrips)
// <eager: split(to: loop, saving: exit)>
// <possesive:
// clearSavePoint
// split(to: loop, saving: exit)>
// <reluctant: save(restoringAt: loop)
builder.label(exitPolicy)
switch extraTrips {
switch maxExtraTrips {
case nil: break
case 0: builder.buildBranch(to: exit)
default:
assert(extraTripsReg != nil, "logic inconsistency")
assert(maxExtraTripsReg != nil, "logic inconsistency")
builder.buildCondBranch(
to: exit, ifZeroElseDecrement: extraTripsReg!)
to: exit, ifZeroElseDecrement: maxExtraTripsReg!)
}

switch updatedKind {
Expand Down Expand Up @@ -666,12 +666,12 @@ fileprivate extension Compiler.ByteCodeGen {
_ child: DSLTree.Node,
_ kind: AST.Quantification.Kind,
_ minTrips: Int,
_ extraTrips: Int?
_ maxExtraTrips: Int?
) -> Bool {
let isScalarSemantics = options.semanticLevel == .unicodeScalar
guard optimizationsEnabled
&& minTrips <= QuantifyPayload.maxStorableTrips
&& extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips
&& maxExtraTrips ?? 0 <= QuantifyPayload.maxStorableTrips
&& kind != .reluctant else {
return false
}
Expand All @@ -681,7 +681,7 @@ fileprivate extension Compiler.ByteCodeGen {
guard let bitset = ccc.asAsciiBitset(options) else {
return false
}
builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)

case .atom(let atom):
switch atom {
Expand All @@ -690,17 +690,17 @@ fileprivate extension Compiler.ByteCodeGen {
guard let val = c._singleScalarAsciiValue else {
return false
}
builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)

case .any:
builder.buildQuantifyAny(
matchesNewlines: true, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
case .anyNonNewline:
builder.buildQuantifyAny(
matchesNewlines: false, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
case .dot:
builder.buildQuantifyAny(
matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)

case .characterClass(let cc):
// Custom character class that consumes a single grapheme
Expand All @@ -709,19 +709,19 @@ fileprivate extension Compiler.ByteCodeGen {
model: model,
kind,
minTrips,
extraTrips,
maxExtraTrips,
isScalarSemantics: isScalarSemantics)
default:
return false
}
case .convertedRegexLiteral(let node, _):
return tryEmitFastQuant(node, kind, minTrips, extraTrips)
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
case .nonCapturingGroup(let groupKind, let node):
// .nonCapture nonCapturingGroups are ignored during compilation
guard groupKind.ast == .nonCapture else {
return false
}
return tryEmitFastQuant(node, kind, minTrips, extraTrips)
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
default:
return false
}
Expand Down
80 changes: 43 additions & 37 deletions Sources/_StringProcessing/Engine/Backtracking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ extension Processor {
struct SavePoint {
var pc: InstructionAddress
var pos: Position?

// Quantifiers may store a range of positions to restore to
var rangeStart: Position?
var rangeEnd: Position?
var quantifiedRange: Range<Position>?

// FIXME: refactor, for now this field is only used for quantifier save
// points. We should try to separate out the concerns better.
Expand Down Expand Up @@ -49,64 +49,70 @@ extension Processor {
return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters)
}

var rangeIsEmpty: Bool { rangeEnd == nil }

mutating func updateRange(newEnd: Input.Index) {
if rangeStart == nil {
rangeStart = newEnd
}
rangeEnd = newEnd
// Whether this save point is quantified, meaning it has a range of
// possible positions to explore.
var isQuantified: Bool {
quantifiedRange != nil
}

/// Move the next range position into pos, and removing it from the range
mutating func takePositionFromRange(_ input: Input) {
assert(!rangeIsEmpty)
pos = rangeEnd!
shrinkRange(input)
}
mutating func takePositionFromQuantifiedRange(_ input: Input) {
assert(isQuantified)
let range = quantifiedRange!
pos = range.upperBound
if range.isEmpty {
// Becomes a normal save point
quantifiedRange = nil
return
}

/// Shrink the range of the save point by one index, essentially dropping the last index
mutating func shrinkRange(_ input: Input) {
assert(!rangeIsEmpty)
if rangeEnd == rangeStart {
// The range is now empty
rangeStart = nil
rangeEnd = nil
// Shrink the range
let newUpper: Position
if isScalarSemantics {
newUpper = input.unicodeScalars.index(before: range.upperBound)
} else {
if isScalarSemantics {
input.unicodeScalars.formIndex(before: &rangeEnd!)
} else {
input.formIndex(before: &rangeEnd!)
}
newUpper = input.index(before: range.upperBound)
}
quantifiedRange = range.lowerBound..<newUpper
}
}

func makeSavePoint(
_ pc: InstructionAddress,
addressOnly: Bool = false
resumingAt pc: InstructionAddress
) -> SavePoint {
SavePoint(
pc: pc,
pos: currentPosition,
quantifiedRange: nil,
isScalarSemantics: false,
stackEnd: .init(callStack.count),
captureEnds: storedCaptures,
intRegisters: registers.ints,
posRegisters: registers.positions)
}

func makeAddressOnlySavePoint(
resumingAt pc: InstructionAddress
) -> SavePoint {
SavePoint(
pc: pc,
pos: addressOnly ? nil : currentPosition,
rangeStart: nil,
rangeEnd: nil,
isScalarSemantics: false, // FIXME: refactor away
pos: nil,
quantifiedRange: nil,
isScalarSemantics: false,
stackEnd: .init(callStack.count),
captureEnds: storedCaptures,
intRegisters: registers.ints,
posRegisters: registers.positions)
}

func startQuantifierSavePoint(

func makeQuantifiedSavePoint(
_ range: Range<Position>,
isScalarSemantics: Bool
) -> SavePoint {
// Restores to the instruction AFTER the current quantifier instruction
SavePoint(
pc: controller.pc + 1,
pos: nil,
rangeStart: nil,
rangeEnd: nil,
quantifiedRange: range,
isScalarSemantics: isScalarSemantics,
stackEnd: .init(callStack.count),
captureEnds: storedCaptures,
Expand Down
Loading