From 3cb56d602bc7b0cc13fba46306bd927ba9a7839b Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 12:58:18 -0700 Subject: [PATCH 01/10] wip: small refactoring --- .../_StringProcessing/Engine/Backtracking.swift | 17 ++++++++++++++--- .../_StringProcessing/Engine/MEQuantify.swift | 10 +++++----- .../_StringProcessing/Engine/Processor.swift | 13 ++++++------- Sources/_StringProcessing/Engine/Tracing.swift | 2 +- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index 48470ce91..65ce4f906 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -13,6 +13,7 @@ extension Processor { struct SavePoint { var pc: InstructionAddress var pos: Position? + // Quantifiers may store a range of positions to restore to var rangeStart: Position? var rangeEnd: Position? @@ -49,10 +50,20 @@ extension Processor { return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) } - var rangeIsEmpty: Bool { rangeEnd == nil } + // Whether this save point is quantified, meaning it has a range of + // possible positions to explore. + var isQuantified: Bool { + if rangeEnd == nil { + assert(rangeStart == nil) + return false + } + assert(rangeStart != nil) + return true + } mutating func updateRange(newEnd: Input.Index) { if rangeStart == nil { + assert(rangeEnd == nil) rangeStart = newEnd } rangeEnd = newEnd @@ -60,14 +71,14 @@ extension Processor { /// Move the next range position into pos, and removing it from the range mutating func takePositionFromRange(_ input: Input) { - assert(!rangeIsEmpty) + assert(isQuantified) pos = rangeEnd! shrinkRange(input) } /// Shrink the range of the save point by one index, essentially dropping the last index mutating func shrinkRange(_ input: Input) { - assert(!rangeIsEmpty) + assert(isQuantified) if rangeEnd == rangeStart { // The range is now empty rangeStart = nil diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index c6f55ee34..9c18997b2 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -64,7 +64,7 @@ extension Processor { } let next = _doQuantifyMatch(payload) guard let idx = next else { - if !savePoint.rangeIsEmpty { + if savePoint.isQuantified { // The last save point has saved the current, non-matching position, // so it's unneeded. savePoint.shrinkRange(input) @@ -80,7 +80,7 @@ extension Processor { return false } - if !savePoint.rangeIsEmpty { + if savePoint.isQuantified { savePoints.append(savePoint) } return true @@ -104,7 +104,7 @@ extension Processor { // The last save point has saved the current position, so it's unneeded savePoint.shrinkRange(input) - if !savePoint.rangeIsEmpty { + if savePoint.isQuantified { savePoints.append(savePoint) } return true @@ -125,13 +125,13 @@ extension Processor { savePoint.updateRange(newEnd: currentPosition) } - if savePoint.rangeIsEmpty { + if !savePoint.isQuantified { signalFailure() return false } // The last save point has saved the current position, so it's unneeded savePoint.shrinkRange(input) - if !savePoint.rangeIsEmpty { + if savePoint.isQuantified { savePoints.append(savePoint) } return true diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 6ecc49df7..5d788bc51 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -335,15 +335,14 @@ extension Processor { ) let idx = savePoints.index(before: savePoints.endIndex) - // If we have a quantifier save point, move the next range position into pos - if !savePoints[idx].rangeIsEmpty { + + // If we have a quantifier save point, move the next range position into + // pos instead of removing it + if savePoints[idx].isQuantified { savePoints[idx].takePositionFromRange(input) - } - // If we have a normal save point or an empty quantifier save point, remove it - if savePoints[idx].rangeIsEmpty { - (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure - } else { (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints[idx].destructure + } else { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure } assert(stackEnd.rawValue <= callStack.count) diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index b0ce67555..4b7a6fcf5 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -118,7 +118,7 @@ extension Processor.SavePoint { if let p = self.pos { posStr = "\(input.distance(from: input.startIndex, to: p))" } else { - if rangeIsEmpty { + if !isQuantified { posStr = "" } else { let startStr = "\(input.distance(from: input.startIndex, to: rangeStart!))" From bd7d7c46bb24cd530c3667e8e003d7e3576b9643 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 13:41:49 -0700 Subject: [PATCH 02/10] wip: more refactor --- .../Engine/Backtracking.swift | 56 +++++++++++++++++-- .../_StringProcessing/Engine/MECapture.swift | 5 +- .../_StringProcessing/Engine/MEQuantify.swift | 33 ++++++----- .../_StringProcessing/Engine/Processor.swift | 13 ++--- 4 files changed, 75 insertions(+), 32 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index 65ce4f906..deaff5b3e 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -94,20 +94,68 @@ extension Processor { } func makeSavePoint( - _ pc: InstructionAddress, - addressOnly: Bool = false + resumingAt pc: InstructionAddress ) -> SavePoint { SavePoint( pc: pc, - pos: addressOnly ? nil : currentPosition, + pos: currentPosition, rangeStart: nil, rangeEnd: nil, - isScalarSemantics: false, // FIXME: refactor away + isScalarSemantics: false, stackEnd: .init(callStack.count), captureEnds: storedCaptures, intRegisters: registers.ints, posRegisters: registers.positions) } + + func makeAddressOnlySavePoint( + resumingAt pc: InstructionAddress + ) -> SavePoint { + SavePoint( + pc: pc, + pos: nil, + rangeStart: nil, + rangeEnd: nil, + isScalarSemantics: false, + stackEnd: .init(callStack.count), + captureEnds: storedCaptures, + intRegisters: registers.ints, + posRegisters: registers.positions) + } + + func makeQuantifiedSavePoint( + _ range: Range, + isScalarSemantics: Bool + ) -> SavePoint { + SavePoint( + pc: controller.pc + 1, + pos: nil, + rangeStart: range.lowerBound, + rangeEnd: range.upperBound, + isScalarSemantics: isScalarSemantics, + stackEnd: .init(callStack.count), + captureEnds: storedCaptures, + intRegisters: registers.ints, + posRegisters: registers.positions) + } +// +// func makeSavePoint( +// resumeAt pc: InstructionAddress? = nil, +// quantifiedRange: Range? = nil, +// addressOnly: Bool = false, +// isScalarSemantics: Bool = false +// ) -> SavePoint { +// SavePoint( +// pc: pc ?? controller.pc + 1, +// pos: addressOnly ? nil : currentPosition, +// rangeStart: quantifiedRange?.lowerBound, +// rangeEnd: quantifiedRange?.lowerBound, +// isScalarSemantics: false, // FIXME: refactor away +// stackEnd: .init(callStack.count), +// captureEnds: storedCaptures, +// intRegisters: registers.ints, +// posRegisters: registers.positions) +// } func startQuantifierSavePoint( isScalarSemantics: Bool diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 4bea21133..9bb4ecb06 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -76,10 +76,7 @@ extension Processor { currentCaptureBegin = nil } - mutating func registerValue( - _ value: Any, - overwriteInitial: SavePoint? = nil - ) { + mutating func registerValue(_ value: Any) { _invariantCheck() defer { _invariantCheck() } diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 9c18997b2..5e46cc5cf 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -86,28 +86,28 @@ extension Processor { return true } - /// Specialized quantify instruction interpreter for * - mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + /// Specialized quantify instruction interpreter for `*`, always succeeds + mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { assert(payload.quantKind == .eager && payload.minTrips == 0 && payload.extraTrips == nil) - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) - while true { - savePoint.updateRange(newEnd: currentPosition) - let next = _doQuantifyMatch(payload) - guard let idx = next else { break } - currentPosition = idx + guard let next = _doQuantifyMatch(payload) else { + // Consumed no input, no point saved + return } - // The last save point has saved the current position, so it's unneeded - savePoint.shrinkRange(input) - if savePoint.isQuantified { - savePoints.append(savePoint) + // Create a quantified save point for every part of the input matched up + // to the final position. + let rangeStart = currentPosition + var rangeEnd = currentPosition + while true { + guard let next = _doQuantifyMatch(payload) else { break } + rangeEnd = currentPosition + currentPosition = next } - return true + + savePoints.append(makeQuantifiedSavePoint(rangeStart.. Date: Sun, 23 Jul 2023 13:55:34 -0700 Subject: [PATCH 03/10] wip: more refactoring --- .../_StringProcessing/Engine/MEQuantify.swift | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 5e46cc5cf..3b8197cab 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -91,7 +91,13 @@ extension Processor { assert(payload.quantKind == .eager && payload.minTrips == 0 && payload.extraTrips == nil) + _doRunEagerZeroOrMoreQuantify(payload) + } + // NOTE: So-as to inline into one-or-more call, which makes a significant + // performance difference + @inline(__always) + mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { guard let next = _doQuantifyMatch(payload) else { // Consumed no input, no point saved return @@ -110,30 +116,21 @@ extension Processor { savePoints.append(makeQuantifiedSavePoint(rangeStart.. Bool { assert(payload.quantKind == .eager && payload.minTrips == 1 && payload.extraTrips == nil) - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) - while true { - let next = _doQuantifyMatch(payload) - guard let idx = next else { break } - currentPosition = idx - savePoint.updateRange(newEnd: currentPosition) - } - if !savePoint.isQuantified { + // Match at least once + guard let next = _doQuantifyMatch(payload) else { signalFailure() return false } - // The last save point has saved the current position, so it's unneeded - savePoint.shrinkRange(input) - if savePoint.isQuantified { - savePoints.append(savePoint) - } + + // Run `a+` as `aa*` + currentPosition = next + _doRunEagerZeroOrMoreQuantify(payload) return true } From 2b694467e85f6bc5738ef363f036e136aacc1a7c Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 14:09:51 -0700 Subject: [PATCH 04/10] wip: more refactoring --- .../_StringProcessing/Engine/MEQuantify.swift | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 3b8197cab..427de3ab9 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -50,18 +50,26 @@ extension Processor { mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { var trips = 0 var extraTrips = payload.extraTrips + + while trips < payload.minTrips { + guard let next = _doQuantifyMatch(payload) else { + signalFailure() + return false + } + currentPosition = next + trips += 1 + } + var savePoint = startQuantifierSavePoint( isScalarSemantics: payload.isScalarSemantics ) - while true { - if trips >= payload.minTrips { - if extraTrips == 0 { break } - extraTrips = extraTrips.map({$0 - 1}) - if payload.quantKind == .eager { - savePoint.updateRange(newEnd: currentPosition) - } + if extraTrips == 0 { break } + extraTrips = extraTrips.map({$0 - 1}) + if payload.quantKind == .eager { + savePoint.updateRange(newEnd: currentPosition) } + let next = _doQuantifyMatch(payload) guard let idx = next else { if savePoint.isQuantified { @@ -75,11 +83,6 @@ extension Processor { trips += 1 } - if trips < payload.minTrips { - signalFailure() - return false - } - if savePoint.isQuantified { savePoints.append(savePoint) } From a7c09b7ce29a5ec3e6a193a4c13e215fdf28e165 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 14:13:28 -0700 Subject: [PATCH 05/10] wip: refactor to say max extra trips --- Sources/_StringProcessing/ByteCodeGen.swift | 66 +++++++++---------- .../Engine/InstPayload.swift | 32 ++++----- .../_StringProcessing/Engine/MEBuilder.swift | 16 ++--- .../_StringProcessing/Engine/MEQuantify.swift | 14 ++-- .../_StringProcessing/Engine/Processor.swift | 2 +- .../_StringProcessing/Engine/Tracing.swift | 2 +- Tests/RegexTests/MatchTests.swift | 12 ++-- 7 files changed, 72 insertions(+), 72 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index cb2e9ed04..494d512e2 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -459,16 +459,16 @@ fileprivate extension Compiler.ByteCodeGen { assert(high != 0) assert((0...(high ?? Int.max)).contains(low)) - let extraTrips: Int? + let maxExtraTrips: Int? if let h = high { - extraTrips = h - low + maxExtraTrips = h - low } else { - extraTrips = nil + maxExtraTrips = nil } let minTrips = low - assert((extraTrips ?? 1) >= 0) + assert((maxExtraTrips ?? 1) >= 0) - if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) { + if tryEmitFastQuant(child, updatedKind, minTrips, maxExtraTrips) { return } @@ -486,19 +486,19 @@ fileprivate extension Compiler.ByteCodeGen { decrement %minTrips and fallthrough loop-body: - : + : mov currentPosition %pos evaluate the subexpression - : + : if %pos is currentPosition: goto exit goto min-trip-count control block exit-policy control block: - if %extraTrips is zero: + if %maxExtraTrips is zero: goto exit else: - decrement %extraTrips and fallthrough + decrement %maxExtraTrips and fallthrough : save exit and goto loop-body @@ -525,12 +525,12 @@ fileprivate extension Compiler.ByteCodeGen { /* fallthrough */ """ - // Specialization based on `extraTrips` for 0 or unbounded + // Specialization based on `maxExtraTrips` for 0 or unbounded _ = """ exit-policy control block: - : + : goto exit - : + : /* fallthrough */ """ @@ -563,12 +563,12 @@ fileprivate extension Compiler.ByteCodeGen { minTripsReg = nil } - let extraTripsReg: IntRegister? - if (extraTrips ?? 0) > 0 { - extraTripsReg = builder.makeIntRegister( - initialValue: extraTrips!) + let maxExtraTripsReg: IntRegister? + if (maxExtraTrips ?? 0) > 0 { + maxExtraTripsReg = builder.makeIntRegister( + initialValue: maxExtraTrips!) } else { - extraTripsReg = nil + maxExtraTripsReg = nil } // Set up a dummy save point for possessive to update @@ -600,7 +600,7 @@ fileprivate extension Compiler.ByteCodeGen { let startPosition: PositionRegister? let emitPositionChecking = (!optimizationsEnabled || !child.guaranteesForwardProgress) && - extraTrips == nil + maxExtraTrips == nil if emitPositionChecking { startPosition = builder.makePositionRegister() @@ -610,7 +610,7 @@ fileprivate extension Compiler.ByteCodeGen { } try emitNode(child) if emitPositionChecking { - // in all quantifier cases, no matter what minTrips or extraTrips is, + // in all quantifier cases, no matter what minTrips or maxExtraTrips is, // if we have a successful non-advancing match, branch to exit because it // can match an arbitrary number of times builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!) @@ -623,20 +623,20 @@ fileprivate extension Compiler.ByteCodeGen { } // exit-policy: - // condBranch(to: exit, ifZeroElseDecrement: %extraTrips) + // condBranch(to: exit, ifZeroElseDecrement: %maxExtraTrips) // // // Bool { let isScalarSemantics = options.semanticLevel == .unicodeScalar guard optimizationsEnabled && minTrips <= QuantifyPayload.maxStorableTrips - && extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips + && maxExtraTrips ?? 0 <= QuantifyPayload.maxStorableTrips && kind != .reluctant else { return false } @@ -681,7 +681,7 @@ fileprivate extension Compiler.ByteCodeGen { guard let bitset = ccc.asAsciiBitset(options) else { return false } - builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .atom(let atom): switch atom { @@ -690,17 +690,17 @@ fileprivate extension Compiler.ByteCodeGen { guard let val = c._singleScalarAsciiValue else { return false } - builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .any: builder.buildQuantifyAny( - matchesNewlines: true, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .anyNonNewline: builder.buildQuantifyAny( - matchesNewlines: false, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .dot: builder.buildQuantifyAny( - matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .characterClass(let cc): // Custom character class that consumes a single grapheme @@ -709,19 +709,19 @@ fileprivate extension Compiler.ByteCodeGen { model: model, kind, minTrips, - extraTrips, + maxExtraTrips, isScalarSemantics: isScalarSemantics) default: return false } case .convertedRegexLiteral(let node, _): - return tryEmitFastQuant(node, kind, minTrips, extraTrips) + return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips) case .nonCapturingGroup(let groupKind, let node): // .nonCapture nonCapturingGroups are ignored during compilation guard groupKind.ast == .nonCapture else { return false } - return tryEmitFastQuant(node, kind, minTrips, extraTrips) + return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips) default: return false } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index a0e849851..d569fcd32 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -392,18 +392,18 @@ struct QuantifyPayload: RawRepresentable { // b39-b38 - isScalarSemantics // b38-b35 - Payload type (one of 4 types, stored on 3 bits) // b35-b27 - minTrips (8 bit int) - // b27-b18 - extraTrips (8 bit value, one bit for nil) + // b27-b18 - maxExtraTrips (8 bit value, one bit for nil) // b18-b16 - Quantification type (one of three types) // b16-b0 - Payload value (depends on payload type) static var quantKindShift: UInt64 { 16 } - static var extraTripsShift: UInt64 { 18 } + static var maxExtraTripsShift: UInt64 { 18 } static var minTripsShift: UInt64 { 27 } static var typeShift: UInt64 { 35 } static var maxStorableTrips: UInt64 { (1 << 8) - 1 } static var isScalarSemanticsBit: UInt64 { 1 &<< 38 } var quantKindMask: UInt64 { 3 } - var extraTripsMask: UInt64 { 0x1FF } + var maxExtraTripsMask: UInt64 { 0x1FF } var minTripsMask: UInt64 { 0xFF } var typeMask: UInt64 { 7 } var payloadMask: UInt64 { 0xFF_FF } @@ -411,7 +411,7 @@ struct QuantifyPayload: RawRepresentable { static func packInfoValues( _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, _ type: PayloadType, isScalarSemantics: Bool ) -> UInt64 { @@ -425,10 +425,10 @@ struct QuantifyPayload: RawRepresentable { kindVal = 2 } // TODO: refactor / reimplement - let extraTripsVal: UInt64 = extraTrips == nil ? 1 : UInt64(extraTrips!) << 1 + let maxExtraTripsVal: UInt64 = maxExtraTrips == nil ? 1 : UInt64(maxExtraTrips!) << 1 let scalarSemanticsBit = isScalarSemantics ? Self.isScalarSemanticsBit : 0 return (kindVal << QuantifyPayload.quantKindShift) | - (extraTripsVal << QuantifyPayload.extraTripsShift) | + (maxExtraTripsVal << QuantifyPayload.maxExtraTripsShift) | (UInt64(minTrips) << QuantifyPayload.minTripsShift) | (type.rawValue << QuantifyPayload.typeShift) | scalarSemanticsBit @@ -443,41 +443,41 @@ struct QuantifyPayload: RawRepresentable { bitset: AsciiBitsetRegister, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { assert(bitset.bits <= _payloadMask) self.rawValue = bitset.bits - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .bitset, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .bitset, isScalarSemantics: isScalarSemantics) } init( asciiChar: UInt8, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { self.rawValue = UInt64(asciiChar) - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .asciiChar, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .asciiChar, isScalarSemantics: isScalarSemantics) } init( matchesNewlines: Bool, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { self.rawValue = (matchesNewlines ? 1 : 0) - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .any, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .any, isScalarSemantics: isScalarSemantics) } init( model: _CharacterClassModel, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { assert(model.cc.rawValue < 0xFF) @@ -485,7 +485,7 @@ struct QuantifyPayload: RawRepresentable { + (model.isInverted ? 1 << 9 : 0) + (model.isStrictASCII ? 1 << 10 : 0) self.rawValue = packedModel - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .builtin, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .builtin, isScalarSemantics: isScalarSemantics) } var type: PayloadType { @@ -506,8 +506,8 @@ struct QuantifyPayload: RawRepresentable { (self.rawValue >> QuantifyPayload.minTripsShift) & minTripsMask } - var extraTrips: UInt64? { - let val = (self.rawValue >> QuantifyPayload.extraTripsShift) & extraTripsMask + var maxExtraTrips: UInt64? { + let val = (self.rawValue >> QuantifyPayload.maxExtraTripsShift) & maxExtraTripsMask if val == 1 { return nil } else { diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index e26a00fb1..44c938e71 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -225,48 +225,48 @@ extension MEProgram.Builder { bitset: DSLTree.CustomCharacterClass.AsciiBitset, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantify( asciiChar: UInt8, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantifyAny( matchesNewlines: Bool, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantify( model: _CharacterClassModel, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(model: model,kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(model: model,kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildAccept() { diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 427de3ab9..4b899f25e 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -46,10 +46,10 @@ extension Processor { /// Generic quantify instruction interpreter /// - Handles .eager and .posessive - /// - Handles arbitrary minTrips and extraTrips + /// - Handles arbitrary minTrips and maxExtraTrips mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { var trips = 0 - var extraTrips = payload.extraTrips + var maxExtraTrips = payload.maxExtraTrips while trips < payload.minTrips { guard let next = _doQuantifyMatch(payload) else { @@ -64,8 +64,8 @@ extension Processor { isScalarSemantics: payload.isScalarSemantics ) while true { - if extraTrips == 0 { break } - extraTrips = extraTrips.map({$0 - 1}) + if maxExtraTrips == 0 { break } + maxExtraTrips = maxExtraTrips.map({$0 - 1}) if payload.quantKind == .eager { savePoint.updateRange(newEnd: currentPosition) } @@ -93,7 +93,7 @@ extension Processor { mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { assert(payload.quantKind == .eager && payload.minTrips == 0 - && payload.extraTrips == nil) + && payload.maxExtraTrips == nil) _doRunEagerZeroOrMoreQuantify(payload) } @@ -123,7 +123,7 @@ extension Processor { mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.quantKind == .eager && payload.minTrips == 1 - && payload.extraTrips == nil) + && payload.maxExtraTrips == nil) // Match at least once guard let next = _doQuantifyMatch(payload) else { @@ -140,7 +140,7 @@ extension Processor { /// Specialized quantify instruction interpreter for ? mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.minTrips == 0 - && payload.extraTrips == 1) + && payload.maxExtraTrips == 1) let next = _doQuantifyMatch(payload) guard let idx = next else { return true // matched zero times diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 483dc3169..8224f791b 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -517,7 +517,7 @@ extension Processor { case .quantify: let quantPayload = payload.quantify let matched: Bool - switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.extraTrips) { + switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) { case (.reluctant, _, _): assertionFailure(".reluctant is not supported by .quantify") return diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index 4b7a6fcf5..7ff44f2a7 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -93,7 +93,7 @@ extension Instruction: CustomStringConvertible { return "\(opcode) \(imm) -> int[\(reg)]" case .quantify: let payload = payload.quantify - return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.extraTrips?.description ?? "unbounded" )" + return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.maxExtraTrips?.description ?? "unbounded" )" case .save: let resumeAddr = payload.addr return "\(opcode) \(resumeAddr)" diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index e5bb574e6..e8de075eb 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2657,13 +2657,13 @@ extension RegexTests { } func testQuantifyOptimization() throws { - // test that the maximum values for minTrips and extraTrips are handled correctly + // test that the maximum values for minTrips and maxExtraTrips are handled correctly let maxStorable = Int(QuantifyPayload.maxStorableTrips) - let maxExtraTrips = "a{,\(maxStorable)}" - expectProgram(for: maxExtraTrips, contains: [.quantify]) - firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) - firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) - XCTAssertNil(try Regex(maxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) + let maxmaxExtraTrips = "a{,\(maxStorable)}" + expectProgram(for: maxmaxExtraTrips, contains: [.quantify]) + firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) + XCTAssertNil(try Regex(maxmaxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) let maxMinTrips = "a{\(maxStorable),}" expectProgram(for: maxMinTrips, contains: [.quantify]) From eb1f50519513abc22e4d45cada41fe99de74bd94 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 14:39:51 -0700 Subject: [PATCH 06/10] more refactoring --- .../_StringProcessing/Engine/MEQuantify.swift | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 4b899f25e..8342df12d 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -60,31 +60,36 @@ extension Processor { trips += 1 } - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) + if maxExtraTrips == 0 { + // We're done + return true + } + + guard let next = _doQuantifyMatch(payload) else { + return true + } + maxExtraTrips = maxExtraTrips.map { $0 - 1 } + + // Remember the range of valid positions in case we can create a quantified + // save point + let rangeStart = currentPosition + var rangeEnd = currentPosition + currentPosition = next + while true { if maxExtraTrips == 0 { break } - maxExtraTrips = maxExtraTrips.map({$0 - 1}) - if payload.quantKind == .eager { - savePoint.updateRange(newEnd: currentPosition) - } - let next = _doQuantifyMatch(payload) - guard let idx = next else { - if savePoint.isQuantified { - // The last save point has saved the current, non-matching position, - // so it's unneeded. - savePoint.shrinkRange(input) - } + guard let next = _doQuantifyMatch(payload) else { break } - currentPosition = idx - trips += 1 + maxExtraTrips = maxExtraTrips.map({$0 - 1}) + rangeEnd = currentPosition + currentPosition = next } - if savePoint.isQuantified { - savePoints.append(savePoint) + if payload.quantKind == .eager { + savePoints.append(makeQuantifiedSavePoint( + rangeStart.. Date: Sun, 23 Jul 2023 14:53:42 -0700 Subject: [PATCH 07/10] more refactoring --- .../Engine/Backtracking.swift | 89 ++++--------------- .../_StringProcessing/Engine/Tracing.swift | 5 +- 2 files changed, 21 insertions(+), 73 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index deaff5b3e..dfd7be3a4 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -15,8 +15,7 @@ extension Processor { var pos: Position? // Quantifiers may store a range of positions to restore to - var rangeStart: Position? - var rangeEnd: Position? + var quantifiedRange: Range? // FIXME: refactor, for now this field is only used for quantifier save // points. We should try to separate out the concerns better. @@ -53,43 +52,28 @@ extension Processor { // Whether this save point is quantified, meaning it has a range of // possible positions to explore. var isQuantified: Bool { - if rangeEnd == nil { - assert(rangeStart == nil) - return false - } - assert(rangeStart != nil) - return true - } - - mutating func updateRange(newEnd: Input.Index) { - if rangeStart == nil { - assert(rangeEnd == nil) - rangeStart = newEnd - } - rangeEnd = newEnd + quantifiedRange != nil } /// Move the next range position into pos, and removing it from the range mutating func takePositionFromRange(_ input: Input) { assert(isQuantified) - pos = rangeEnd! - shrinkRange(input) - } + let range = quantifiedRange! + pos = range.upperBound + if range.isEmpty { + // Becomes a normal save point + quantifiedRange = nil + return + } - /// Shrink the range of the save point by one index, essentially dropping the last index - mutating func shrinkRange(_ input: Input) { - assert(isQuantified) - if rangeEnd == rangeStart { - // The range is now empty - rangeStart = nil - rangeEnd = nil + // Shrink the range + let newUpper: Position + if isScalarSemantics { + newUpper = input.unicodeScalars.index(before: range.upperBound) } else { - if isScalarSemantics { - input.unicodeScalars.formIndex(before: &rangeEnd!) - } else { - input.formIndex(before: &rangeEnd!) - } + newUpper = input.index(before: range.upperBound) } + quantifiedRange = range.lowerBound..? = nil, -// addressOnly: Bool = false, -// isScalarSemantics: Bool = false -// ) -> SavePoint { -// SavePoint( -// pc: pc ?? controller.pc + 1, -// pos: addressOnly ? nil : currentPosition, -// rangeStart: quantifiedRange?.lowerBound, -// rangeEnd: quantifiedRange?.lowerBound, -// isScalarSemantics: false, // FIXME: refactor away -// stackEnd: .init(callStack.count), -// captureEnds: storedCaptures, -// intRegisters: registers.ints, -// posRegisters: registers.positions) -// } - - func startQuantifierSavePoint( - isScalarSemantics: Bool - ) -> SavePoint { - // Restores to the instruction AFTER the current quantifier instruction - SavePoint( - pc: controller.pc + 1, - pos: nil, - rangeStart: nil, - rangeEnd: nil, + quantifiedRange: range, isScalarSemantics: isScalarSemantics, stackEnd: .init(callStack.count), captureEnds: storedCaptures, diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index 7ff44f2a7..90445d5ec 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -121,8 +121,9 @@ extension Processor.SavePoint { if !isQuantified { posStr = "" } else { - let startStr = "\(input.distance(from: input.startIndex, to: rangeStart!))" - let endStr = "\(input.distance(from: input.startIndex, to: rangeEnd!))" + let range = quantifiedRange! + let startStr = "\(input.distance(from: input.startIndex, to: range.lowerBound))" + let endStr = "\(input.distance(from: input.startIndex, to: range.upperBound))" posStr = "\(startStr)...\(endStr)" } } From 36b161275c991dc1b9869e20633a44774a0c1342 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 23 Jul 2023 14:55:58 -0700 Subject: [PATCH 08/10] refactoring --- Sources/_StringProcessing/Engine/Backtracking.swift | 2 +- Sources/_StringProcessing/Engine/Processor.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index dfd7be3a4..11e2db0e4 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -56,7 +56,7 @@ extension Processor { } /// Move the next range position into pos, and removing it from the range - mutating func takePositionFromRange(_ input: Input) { + mutating func takePositionFromQuantifiedRange(_ input: Input) { assert(isQuantified) let range = quantifiedRange! pos = range.upperBound diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 8224f791b..6e0a7774c 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -339,7 +339,7 @@ extension Processor { // If we have a quantifier save point, move the next range position into // pos instead of removing it if savePoints[idx].isQuantified { - savePoints[idx].takePositionFromRange(input) + savePoints[idx].takePositionFromQuantifiedRange(input) (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints[idx].destructure } else { (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure From b9df444285775b6e35a343adfcc89c7f5c7dfed9 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 2 Aug 2023 06:02:20 -0600 Subject: [PATCH 09/10] added assertions --- Sources/_StringProcessing/Engine/MEQuantify.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 8342df12d..3d6c96a44 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -48,6 +48,9 @@ extension Processor { /// - Handles .eager and .posessive /// - Handles arbitrary minTrips and maxExtraTrips mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind != .reluctant) + assert(payload.minTrips >= 2, "Should have hit a specialized path") + var trips = 0 var maxExtraTrips = payload.maxExtraTrips From d7bc19dd793e4a4d087e8de83b8f690623c32fe1 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 21 Aug 2023 09:36:02 -0600 Subject: [PATCH 10/10] wip: assertions --- Sources/_StringProcessing/Engine/MEQuantify.swift | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 3d6c96a44..2d187607c 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -49,7 +49,6 @@ extension Processor { /// - Handles arbitrary minTrips and maxExtraTrips mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.quantKind != .reluctant) - assert(payload.minTrips >= 2, "Should have hit a specialized path") var trips = 0 var maxExtraTrips = payload.maxExtraTrips @@ -93,6 +92,9 @@ extension Processor { if payload.quantKind == .eager { savePoints.append(makeQuantifiedSavePoint( rangeStart..