diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 808a1e498..c19996d44 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -391,9 +391,8 @@ extension DSLTree.CustomCharacterClass.Member { return { input, bounds in let curIdx = bounds.lowerBound - let nextIndex = isCharacterSemantic - ? input.index(after: curIdx) - : input.unicodeScalars.index(after: curIdx) + let nextIndex = input.index( + after: curIdx, isScalarSemantics: !isCharacterSemantic) // Under grapheme semantics, we compare based on single NFC scalars. If // such a character is not single scalar under NFC, the match fails. In @@ -603,9 +602,9 @@ extension AST.Atom.CharacterProperty { if p(input, bounds) != nil { return nil } // TODO: bounds check - return opts.semanticLevel == .graphemeCluster - ? input.index(after: bounds.lowerBound) - : input.unicodeScalars.index(after: bounds.lowerBound) + return input.index( + after: bounds.lowerBound, + isScalarSemantics: opts.semanticLevel == .unicodeScalar) } } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 78baf9ce1..0476b882b 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -381,7 +381,7 @@ struct QuantifyPayload: RawRepresentable { case asciiBitset = 0 case asciiChar = 1 case any = 2 - case builtin = 4 + case builtinCC = 4 } // TODO: figure out how to better organize this... @@ -408,6 +408,14 @@ struct QuantifyPayload: RawRepresentable { var typeMask: UInt64 { 7 } var payloadMask: UInt64 { 0xFF_FF } + // Calculate the maximum number of trips, else UInt64.max if unbounded + var maxTrips: UInt64 { + guard let maxExtraTrips else { + return UInt64.max + } + return minTrips + maxExtraTrips + } + static func packInfoValues( _ kind: AST.Quantification.Kind, _ minTrips: Int, @@ -485,7 +493,7 @@ struct QuantifyPayload: RawRepresentable { + (model.isInverted ? 1 << 9 : 0) + (model.isStrictASCII ? 1 << 10 : 0) self.rawValue = packedModel - + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .builtin, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .builtinCC, isScalarSemantics: isScalarSemantics) } var type: PayloadType { @@ -531,7 +539,7 @@ struct QuantifyPayload: RawRepresentable { (self.rawValue & 1) == 1 } - var builtin: _CharacterClassModel.Representation { + var builtinCC: _CharacterClassModel.Representation { _CharacterClassModel.Representation(rawValue: self.rawValue & 0xFF)! } var builtinIsInverted: Bool { diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index 0dafd6720..33b13178b 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -223,6 +223,25 @@ extension String { else { return nil } return next } + + internal func matchRegexDot( + at currentPosition: Index, + limitedBy end: Index, + anyMatchesNewline: Bool, + isScalarSemantics: Bool + ) -> Index? { + guard currentPosition < end else { return nil } + + if anyMatchesNewline { + return index( + after: currentPosition, isScalarSemantics: isScalarSemantics) + } + + return matchAnyNonNewline( + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) + } } // MARK: - Built-in character class matching diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index a0480cde6..b3d4818b0 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -1,166 +1,516 @@ +private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset + extension Processor { - func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { + internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind != .reluctant, ".reluctant is not supported by .quantify") + + let minMatches = payload.minTrips + let maxMatches = payload.maxTrips + let produceSavePointRange = payload.quantKind == .eager let isScalarSemantics = payload.isScalarSemantics + let isZeroOrMore = payload.minTrips == 0 && payload.maxExtraTrips == nil + let isOneOrMore = payload.minTrips == 1 && payload.maxExtraTrips == nil + + let matchResult: (next: String.Index, savePointRange: Range?)? switch payload.type { case .asciiBitset: - return input.matchASCIIBitset( - registers[payload.bitset], - at: currentPosition, - limitedBy: end, - isScalarSemantics: isScalarSemantics) + if isZeroOrMore { + matchResult = input.matchZeroOrMoreASCIIBitset( + registers[payload.bitset], + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } else if isOneOrMore { + matchResult = input.matchOneOrMoreASCIIBitset( + registers[payload.bitset], + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } else { + matchResult = input.matchQuantifiedASCIIBitset( + registers[payload.bitset], + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } + case .asciiChar: - return input.matchScalar( - UnicodeScalar.init(_value: UInt32(payload.asciiChar)), - at: currentPosition, - limitedBy: end, - boundaryCheck: !isScalarSemantics, - isCaseInsensitive: false) - case .builtin: - guard currentPosition < end else { return nil } + if isZeroOrMore { + matchResult = input.matchZeroOrMoreScalar( + Unicode.Scalar(payload.asciiChar), + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } else if isOneOrMore { + matchResult = input.matchOneOrMoreScalar( + Unicode.Scalar(payload.asciiChar), + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } else { + matchResult = input.matchQuantifiedScalar( + Unicode.Scalar(payload.asciiChar), + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics) + } - // We only emit .quantify if it consumes a single character - return input.matchBuiltinCC( - payload.builtin, - at: currentPosition, - limitedBy: end, - isInverted: payload.builtinIsInverted, - isStrictASCII: payload.builtinIsStrict, - isScalarSemantics: isScalarSemantics) case .any: - guard currentPosition < end else { return nil } + if isZeroOrMore { + matchResult = input.matchZeroOrMoreRegexDot( + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + anyMatchesNewline: payload.anyMatchesNewline, + isScalarSemantics: isScalarSemantics) + } else if isOneOrMore { + matchResult = input.matchOneOrMoreRegexDot( + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + anyMatchesNewline: payload.anyMatchesNewline, + isScalarSemantics: isScalarSemantics) + } else { + matchResult = input.matchQuantifiedRegexDot( + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + anyMatchesNewline: payload.anyMatchesNewline, + isScalarSemantics: isScalarSemantics) + } - if payload.anyMatchesNewline { - if isScalarSemantics { - return input.unicodeScalars.index(after: currentPosition) - } - return input.index(after: currentPosition) + case .builtinCC: + if isZeroOrMore { + matchResult = input.matchZeroOrMoreBuiltinCC( + payload.builtinCC, + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isInverted: payload.builtinIsInverted, + isStrictASCII: payload.builtinIsStrict, + isScalarSemantics: isScalarSemantics) + } else if isOneOrMore { + matchResult = input.matchOneOrMoreBuiltinCC( + payload.builtinCC, + at: currentPosition, + limitedBy: end, + produceSavePointRange: produceSavePointRange, + isInverted: payload.builtinIsInverted, + isStrictASCII: payload.builtinIsStrict, + isScalarSemantics: isScalarSemantics) + } else { + matchResult = input.matchQuantifiedBuiltinCC( + payload.builtinCC, + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isInverted: payload.builtinIsInverted, + isStrictASCII: payload.builtinIsStrict, + isScalarSemantics: isScalarSemantics) } + } - return input.matchAnyNonNewline( - at: currentPosition, - limitedBy: end, - isScalarSemantics: isScalarSemantics) + guard let (next, savePointRange) = matchResult else { + signalFailure() + return false + } + if let savePointRange { + assert(produceSavePointRange) + savePoints.append(makeQuantifiedSavePoint( + savePointRange, isScalarSemantics: payload.isScalarSemantics)) } + currentPosition = next + return true } +} - /// Generic quantify instruction interpreter - /// - Handles .eager and .posessive - /// - Handles arbitrary minTrips and maxExtraTrips - mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { - assert(payload.quantKind != .reluctant) +/// MARK: - Non-reluctant quantification operations on String - var trips = 0 - var maxExtraTrips = payload.maxExtraTrips +extension String { + /// Run the quant loop, using the supplied matching closure + /// + /// NOTE: inline-always to help elimiate the closure overhead, + /// simplify some of the looping structure, etc. + @inline(__always) + fileprivate func _runQuantLoop( + at currentPosition: Index, + limitedBy end: Index, + minMatches: UInt64, + maxMatches: UInt64, + produceSavePointRange: Bool, + isScalarSemantics: Bool, + _ doMatch: ( + _ currentPosition: Index, _ limitedBy: Index, _ isScalarSemantics: Bool + ) -> Index? + ) -> (next: Index, savePointRange: Range?)? { + var currentPosition = currentPosition + + // The range of backtracking positions to try. For zero-or-more, starts + // before any match happens. Always ends before the final match, since + // the final match is what is tried without backtracking. An empty range + // is valid and means a single backtracking position at rangeStart. + var rangeStart = currentPosition + var rangeEnd = currentPosition + + var numMatches = 0 - while trips < payload.minTrips { - guard let next = _doQuantifyMatch(payload) else { - signalFailure() - return false + while numMatches < maxMatches { + guard let next = doMatch( + currentPosition, end, isScalarSemantics + ) else { + break + } + numMatches &+= 1 + if numMatches == minMatches { + // For this loop iteration, rangeEnd will actually trail rangeStart by + // a single match position. Next iteration, they will be equal + // (empty range denoting a single backtracking point). Note that we + // only ever return a range if we have exceeded `minMatches`; if we + // exactly match `minMatches` there is no backtracking positions to + // remember. + rangeStart = next } + rangeEnd = currentPosition currentPosition = next - trips += 1 + assert(currentPosition > rangeEnd) } - if maxExtraTrips == 0 { - // We're done - return true + guard numMatches >= minMatches else { + return nil } - guard let next = _doQuantifyMatch(payload) else { - return true + guard produceSavePointRange && numMatches > minMatches else { + // No backtracking positions to try + return (currentPosition, nil) } - maxExtraTrips = maxExtraTrips.map { $0 - 1 } + assert(rangeStart <= rangeEnd) - // Remember the range of valid positions in case we can create a quantified - // save point - let rangeStart = currentPosition - var rangeEnd = currentPosition - currentPosition = next + // NOTE: We can't assert that rangeEnd trails currentPosition by exactly + // one position, because newline-sequence in scalar semantic mode still + // matches two scalars - while true { - if maxExtraTrips == 0 { break } + return ( + currentPosition, + Range(uncheckedBounds: (lower: rangeStart, upper: rangeEnd)) + ) + } - guard let next = _doQuantifyMatch(payload) else { - break - } - maxExtraTrips = maxExtraTrips.map({$0 - 1}) - rangeEnd = currentPosition - currentPosition = next - } + // NOTE: [Zero|One]OrMore overloads are to specialize the inlined run loop, + // which has a perf impact. At the time of writing this, 10% for + // zero-or-more and 5% for one-or-more improvement, which could very well + // be much higher if/when the inner match functions are made faster. - if payload.quantKind == .eager { - savePoints.append(makeQuantifiedSavePoint( - rangeStart.. (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 0, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchASCIIBitset( + asciiBitset, + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) } - return true } - - /// Specialized quantify instruction interpreter for `*`, always succeeds - mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { - assert(payload.quantKind == .eager - && payload.minTrips == 0 - && payload.maxExtraTrips == nil) - _doRunEagerZeroOrMoreQuantify(payload) + fileprivate func matchOneOrMoreASCIIBitset( + _ asciiBitset: ASCIIBitset, + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 1, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchASCIIBitset( + asciiBitset, + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) + } } - // NOTE: So-as to inline into one-or-more call, which makes a significant - // performance difference - @inline(__always) - mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { - guard let next = _doQuantifyMatch(payload) else { - // Consumed no input, no point saved - return + fileprivate func matchQuantifiedASCIIBitset( + _ asciiBitset: ASCIIBitset, + at currentPosition: Index, + limitedBy end: Index, + minMatches: UInt64, + maxMatches: UInt64, + produceSavePointRange: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchASCIIBitset( + asciiBitset, + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) } + } - // Create a quantified save point for every part of the input matched up - // to the final position. - let rangeStart = currentPosition - var rangeEnd = currentPosition - currentPosition = next - while true { - guard let next = _doQuantifyMatch(payload) else { break } - rangeEnd = currentPosition - currentPosition = next + fileprivate func matchZeroOrMoreScalar( + _ scalar: Unicode.Scalar, + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 0, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchScalar( + scalar, + at: currentPosition, + limitedBy: end, + boundaryCheck: !isScalarSemantics, + isCaseInsensitive: false) } + } + fileprivate func matchOneOrMoreScalar( + _ scalar: Unicode.Scalar, + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 1, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchScalar( + scalar, + at: currentPosition, + limitedBy: end, + boundaryCheck: !isScalarSemantics, + isCaseInsensitive: false) - savePoints.append(makeQuantifiedSavePoint(rangeStart.. Bool { - assert(payload.quantKind == .eager - && payload.minTrips == 1 - && payload.maxExtraTrips == nil) + fileprivate func matchQuantifiedScalar( + _ scalar: Unicode.Scalar, + at currentPosition: Index, + limitedBy end: Index, + minMatches: UInt64, + maxMatches: UInt64, + produceSavePointRange: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchScalar( + scalar, + at: currentPosition, + limitedBy: end, + boundaryCheck: !isScalarSemantics, + isCaseInsensitive: false) - // Match at least once - guard let next = _doQuantifyMatch(payload) else { - signalFailure() - return false } + } - // Run `a+` as `aa*` - currentPosition = next - _doRunEagerZeroOrMoreQuantify(payload) - return true + fileprivate func matchZeroOrMoreBuiltinCC( + _ builtinCC: _CharacterClassModel.Representation, + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + isInverted: Bool, + isStrictASCII: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 0, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchBuiltinCC( + builtinCC, + at: currentPosition, + limitedBy: end, + isInverted: isInverted, + isStrictASCII: isStrictASCII, + isScalarSemantics: isScalarSemantics) + } + } + fileprivate func matchOneOrMoreBuiltinCC( + _ builtinCC: _CharacterClassModel.Representation, + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + isInverted: Bool, + isStrictASCII: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 1, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchBuiltinCC( + builtinCC, + at: currentPosition, + limitedBy: end, + isInverted: isInverted, + isStrictASCII: isStrictASCII, + isScalarSemantics: isScalarSemantics) + } + } + + fileprivate func matchQuantifiedBuiltinCC( + _ builtinCC: _CharacterClassModel.Representation, + at currentPosition: Index, + limitedBy end: Index, + minMatches: UInt64, + maxMatches: UInt64, + produceSavePointRange: Bool, + isInverted: Bool, + isStrictASCII: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchBuiltinCC( + builtinCC, + at: currentPosition, + limitedBy: end, + isInverted: isInverted, + isStrictASCII: isStrictASCII, + isScalarSemantics: isScalarSemantics) + } } - /// Specialized quantify instruction interpreter for ? - mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { - assert(payload.minTrips == 0 - && payload.maxExtraTrips == 1) - let next = _doQuantifyMatch(payload) - guard let idx = next else { - return true // matched zero times + fileprivate func matchZeroOrMoreRegexDot( + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + anyMatchesNewline: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 0, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchRegexDot( + at: currentPosition, + limitedBy: end, + anyMatchesNewline: anyMatchesNewline, + isScalarSemantics: isScalarSemantics) } - if payload.quantKind != .possessive { - // Save the zero match - savePoints.append(makeSavePoint(resumingAt: currentPC+1)) + } + fileprivate func matchOneOrMoreRegexDot( + at currentPosition: Index, + limitedBy end: Index, + produceSavePointRange: Bool, + anyMatchesNewline: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: 1, + maxMatches: UInt64.max, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchRegexDot( + at: currentPosition, + limitedBy: end, + anyMatchesNewline: anyMatchesNewline, + isScalarSemantics: isScalarSemantics) + } + } + + fileprivate func matchQuantifiedRegexDot( + at currentPosition: Index, + limitedBy end: Index, + minMatches: UInt64, + maxMatches: UInt64, + produceSavePointRange: Bool, + anyMatchesNewline: Bool, + isScalarSemantics: Bool + ) -> (next: Index, savePointRange: Range?)? { + _runQuantLoop( + at: currentPosition, + limitedBy: end, + minMatches: minMatches, + maxMatches: maxMatches, + produceSavePointRange: produceSavePointRange, + isScalarSemantics: isScalarSemantics + ) { currentPosition, end, isScalarSemantics in + matchRegexDot( + at: currentPosition, + limitedBy: end, + anyMatchesNewline: anyMatchesNewline, + isScalarSemantics: isScalarSemantics) } - currentPosition = idx - return true } } + + diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 86365322b..310b5d932 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -515,23 +515,7 @@ extension Processor { controller.step() } case .quantify: - let quantPayload = payload.quantify - let matched: Bool - switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) { - case (.reluctant, _, _): - assertionFailure(".reluctant is not supported by .quantify") - return - case (.eager, 0, nil): - runEagerZeroOrMoreQuantify(quantPayload) - matched = true - case (.eager, 1, nil): - matched = runEagerOneOrMoreQuantify(quantPayload) - case (_, 0, 1): - matched = runZeroOrOneQuantify(quantPayload) - default: - matched = runQuantify(quantPayload) - } - if matched { + if runQuantify(payload.quantify) { controller.step() } diff --git a/Sources/_StringProcessing/Utility/Misc.swift b/Sources/_StringProcessing/Utility/Misc.swift index 8555ec85c..d63370b55 100644 --- a/Sources/_StringProcessing/Utility/Misc.swift +++ b/Sources/_StringProcessing/Utility/Misc.swift @@ -65,3 +65,15 @@ enum QuickResult { case unknown } +extension String { + /// Index after in either grapheme or scalar view + func index(after idx: Index, isScalarSemantics: Bool) -> Index { + if isScalarSemantics { + return unicodeScalars.index(after: idx) + } else { + return index(after: idx) + } + } +} + + diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index ea59cbc5c..47f8f4f9a 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2659,11 +2659,11 @@ extension RegexTests { func testQuantifyOptimization() throws { // test that the maximum values for minTrips and maxExtraTrips are handled correctly let maxStorable = Int(QuantifyPayload.maxStorableTrips) - let maxmaxExtraTrips = "a{,\(maxStorable)}" - expectProgram(for: maxmaxExtraTrips, contains: [.quantify]) - firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) - firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) - XCTAssertNil(try Regex(maxmaxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) + let maxExtraTrips = "a{,\(maxStorable)}" + expectProgram(for: maxExtraTrips, contains: [.quantify]) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) + XCTAssertNil(try Regex(maxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) let maxMinTrips = "a{\(maxStorable),}" expectProgram(for: maxMinTrips, contains: [.quantify])