diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index fdc718ce4..03e2c53ee 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -40,7 +40,7 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func contains(_ regex: Regex) -> Bool { + public func contains(_ regex: R) -> Bool { contains(RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift index 97f7248cd..64a5eb943 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift @@ -56,11 +56,11 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func firstRange(of regex: Regex) -> Range? { + public func firstRange(of regex: R) -> Range? { firstRange(of: RegexConsumer(regex)) } - public func lastRange(of regex: Regex) -> Range? { + public func lastRange(of regex: R) -> Range? { lastRange(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index d4b407202..aefdbce3f 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -33,16 +33,6 @@ public struct RangesCollection { } } -extension RangesCollection where Searcher: BidirectionalCollectionSearcher { - public func reversed() -> ReversedRangesCollection { - ReversedRangesCollection(base: base, searcher: searcher) - } - - public var last: Range? { - base.lastRange(of: searcher) - } -} - public struct RangesIterator: IteratorProtocol { public typealias Base = Searcher.Searched @@ -139,18 +129,6 @@ public struct ReversedRangesCollection { } } -extension ReversedRangesCollection - where Searcher: BidirectionalCollectionSearcher -{ - public func reversed() -> RangesCollection { - RangesCollection(base: base, searcher: searcher) - } - - public var last: Range? { - base.firstRange(of: searcher) - } -} - extension ReversedRangesCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base @@ -238,15 +216,15 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func ranges( - of regex: Regex - ) -> RangesCollection> { + public func ranges( + of regex: R + ) -> RangesCollection> { ranges(of: RegexConsumer(regex)) } - public func rangesFromBack( - of regex: Regex - ) -> ReversedRangesCollection> { + public func rangesFromBack( + of regex: R + ) -> ReversedRangesCollection> { rangesFromBack(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index 120378cec..36a28b381 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -50,11 +50,13 @@ extension RangeReplaceableCollection { maxReplacements: maxReplacements) } - public mutating func replace( + public mutating func replace< + Searcher: CollectionSearcher, Replacement: Collection + >( _ searcher: Searcher, - with replacement: R, + with replacement: Replacement, maxReplacements: Int = .max - ) where Searcher.Searched == SubSequence, R.Element == Element { + ) where Searcher.Searched == SubSequence, Replacement.Element == Element { self = replacing( searcher, with: replacement, @@ -147,8 +149,8 @@ extension RangeReplaceableCollection // MARK: Regex algorithms extension RangeReplaceableCollection where SubSequence == Substring { - public func replacing( - _ regex: Regex, + public func replacing( + _ regex: R, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max @@ -160,8 +162,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: maxReplacements) } - public func replacing( - _ regex: Regex, + public func replacing( + _ regex: R, with replacement: Replacement, maxReplacements: Int = .max ) -> Self where Replacement.Element == Element { @@ -172,8 +174,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: maxReplacements) } - public mutating func replace( - _ regex: Regex, + public mutating func replace( + _ regex: R, with replacement: Replacement, maxReplacements: Int = .max ) where Replacement.Element == Element { diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index cf64af478..ba2cda30b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -25,12 +25,6 @@ public struct SplitCollection { } } -extension SplitCollection where Searcher: BidirectionalCollectionSearcher { - public func reversed() -> ReversedSplitCollection { - ReversedSplitCollection(ranges: ranges.reversed()) - } -} - extension SplitCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base @@ -144,14 +138,6 @@ public struct ReversedSplitCollection { } } -extension ReversedSplitCollection - where Searcher: BidirectionalCollectionSearcher -{ - public func reversed() -> SplitCollection { - SplitCollection(ranges: ranges.reversed()) - } -} - extension ReversedSplitCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base @@ -289,15 +275,15 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func split( - by separator: Regex - ) -> SplitCollection> { + public func split( + by separator: R + ) -> SplitCollection> { split(by: RegexConsumer(separator)) } - public func splitFromBack( - by separator: Regex - ) -> ReversedSplitCollection> { + public func splitFromBack( + by separator: R + ) -> ReversedSplitCollection> { splitFromBack(by: RegexConsumer(separator)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift b/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift index 0ed6faf2c..ee9432fb4 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift @@ -48,11 +48,11 @@ extension BidirectionalCollection where Element: Equatable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func starts(with regex: Regex) -> Bool { + public func starts(with regex: R) -> Bool { starts(with: RegexConsumer(regex)) } - public func ends(with regex: Regex) -> Bool { + public func ends(with regex: R) -> Bool { ends(with: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 9a5afc582..b2438bb3b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -257,15 +257,15 @@ extension RangeReplaceableCollection // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func trimmingPrefix(_ regex: Regex) -> SubSequence { + public func trimmingPrefix(_ regex: R) -> SubSequence { trimmingPrefix(RegexConsumer(regex)) } - public func trimmingSuffix(_ regex: Regex) -> SubSequence { + public func trimmingSuffix(_ regex: R) -> SubSequence { trimmingSuffix(RegexConsumer(regex)) } - public func trimming(_ regex: Regex) -> SubSequence { + public func trimming(_ regex: R) -> SubSequence { trimming(RegexConsumer(regex)) } } @@ -273,32 +273,32 @@ extension BidirectionalCollection where SubSequence == Substring { extension RangeReplaceableCollection where Self: BidirectionalCollection, SubSequence == Substring { - public mutating func trimPrefix(_ regex: Regex) { + public mutating func trimPrefix(_ regex: R) { trimPrefix(RegexConsumer(regex)) } - public mutating func trimSuffix(_ regex: Regex) { + public mutating func trimSuffix(_ regex: R) { trimSuffix(RegexConsumer(regex)) } - public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + public mutating func trim(_ regex: R) { + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } } extension Substring { - public mutating func trimPrefix(_ regex: Regex) { + public mutating func trimPrefix(_ regex: R) { trimPrefix(RegexConsumer(regex)) } - public mutating func trimSuffix(_ regex: Regex) { + public mutating func trimSuffix(_ regex: R) { trimSuffix(RegexConsumer(regex)) } - public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + public mutating func trim(_ regex: R) { + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } diff --git a/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift index 52b4802d9..c9b92b9ec 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift @@ -56,7 +56,7 @@ extension PredicateConsumer: StatelessCollectionSearcher { } extension PredicateConsumer: BackwardCollectionSearcher, - StatelessBackwardCollectionSearcher + BackwardStatelessCollectionSearcher where Searched: BidirectionalCollection { public typealias BackwardSearched = Consumed diff --git a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift index 12ceaa155..5af689cbc 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift @@ -9,56 +9,54 @@ // //===----------------------------------------------------------------------===// -import _MatchingEngine +public struct RegexConsumer< + R: RegexProtocol, Consumed: BidirectionalCollection +> where Consumed.SubSequence == Substring { + // TODO: Should `Regex` itself implement these protocols? + let regex: R -public struct RegexConsumer - where Consumed.SubSequence == Substring -{ - // TODO: consider let, for now lets us toggle tracing - var vm: Executor - - // FIXME: Possibility of fatal error isn't user friendly - public init(_ regex: Regex) { - do { - self.vm = .init( - program: try Compiler(ast: regex.ast).emit()) - } catch { - fatalError("error: \(error)") - } + public init(_ regex: R) { + self.regex = regex } +} - public init(parsing regex: String) throws { - self.vm = try _compileRegex(regex) - } - - func _consuming( +extension RegexConsumer { + func _matchingConsuming( _ consumed: Substring, in range: Range - ) -> String.Index? { - let result = vm.execute( - input: consumed.base, - in: range, - mode: .partialFromFront) - return result?.range.upperBound + ) -> (upperBound: String.Index, match: Match)? { + guard let result = regex._match( + consumed.base, + in: range, mode: .partialFromFront + ) else { return nil } + return (result.range.upperBound, result.match) } +} + +// TODO: Explicitly implement the non-matching consumer/searcher protocols as +// well, taking advantage of the fact that the captures can be ignored + +extension RegexConsumer: MatchingCollectionConsumer { + public typealias Match = R.Match - public func consuming( + public func matchingConsuming( _ consumed: Consumed, in range: Range - ) -> String.Index? { - _consuming(consumed[...], in: range) + ) -> (upperBound: String.Index, match: Match)? { + _matchingConsuming(consumed[...], in: range) } } // TODO: We'll want to bake backwards into the engine -extension RegexConsumer: BidirectionalCollectionConsumer { - public func consumingBack( +extension RegexConsumer: BidirectionalMatchingCollectionConsumer { + public func matchingConsumingBack( _ consumed: Consumed, in range: Range - ) -> String.Index? { + ) -> (lowerBound: String.Index, match: Match)? { var i = range.lowerBound while true { - if let end = _consuming(consumed[...], in: i.. - ) -> Range? { - ConsumerSearcher(consumer: self).search(searched, in: range) + ) -> (range: Range, match: Match)? { + ConsumerSearcher(consumer: self).matchingSearch(searched, in: range) } } // TODO: Bake in search-back to engine too -extension RegexConsumer: StatelessBackwardCollectionSearcher { +extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher { public typealias BackwardSearched = Consumed - public func searchBack( + public func matchingSearchBack( _ searched: BackwardSearched, in range: Range - ) -> Range? { - ConsumerSearcher(consumer: self).searchBack(searched, in: range) + ) -> (range: Range, match: Match)? { + ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range) } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift new file mode 100644 index 000000000..91d33e123 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// MARK: `CollectionSearcher` algorithms + +extension Collection { + public func firstMatch( + of searcher: S + ) -> _MatchResult? where S.Searched == Self { + var state = searcher.state(for: self, in: startIndex..( + of searcher: S + ) -> _BackwardMatchResult? + where S.BackwardSearched == Self + { + var state = searcher.backwardState(for: self, in: startIndex..( + of regex: R + ) -> _MatchResult>? { + firstMatch(of: RegexConsumer(regex)) + } + + public func lastMatch( + of regex: R + ) -> _BackwardMatchResult>? { + lastMatch(of: RegexConsumer(regex)) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift new file mode 100644 index 000000000..2feb09df0 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift @@ -0,0 +1,113 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// MARK: `MatchingCollectionSearcher` algorithms + +extension RangeReplaceableCollection { + public func replacing< + Searcher: MatchingCollectionSearcher, Replacement: Collection + >( + _ searcher: Searcher, + with replacement: (_MatchResult) throws -> Replacement, + subrange: Range, + maxReplacements: Int = .max + ) rethrows -> Self where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + precondition(maxReplacements >= 0) + + var index = subrange.lowerBound + var result = Self() + result.append(contentsOf: self[..( + _ searcher: Searcher, + with replacement: (_MatchResult) throws -> Replacement, + maxReplacements: Int = .max + ) rethrows -> Self where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + try replacing( + searcher, + with: replacement, + subrange: startIndex..( + _ searcher: Searcher, + with replacement: (_MatchResult) throws -> Replacement, + maxReplacements: Int = .max + ) rethrows where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + self = try replacing( + searcher, + with: replacement, + maxReplacements: maxReplacements) + } +} + +// MARK: Regex algorithms + +extension RangeReplaceableCollection where SubSequence == Substring { + public func replacing( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, + subrange: Range, + maxReplacements: Int = .max + ) rethrows -> Self where Replacement.Element == Element { + try replacing( + RegexConsumer(regex), + with: replacement, + subrange: subrange, + maxReplacements: maxReplacements) + } + + public func replacing( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, + maxReplacements: Int = .max + ) rethrows -> Self where Replacement.Element == Element { + try replacing( + regex, + with: replacement, + subrange: startIndex..( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, + maxReplacements: Int = .max + ) rethrows where Replacement.Element == Element { + self = try replacing( + regex, + with: replacement, + maxReplacements: maxReplacements) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift new file mode 100644 index 000000000..2b1f19093 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public struct _MatchResult { + public let match: S.Searched.SubSequence + public let result: S.Match + + public var range: Range { + match.startIndex.. { + public let match: S.BackwardSearched.SubSequence + public let result: S.Match + + public var range: Range { + match.startIndex.. { + public typealias Base = Searcher.Searched + + let base: Base + let searcher: Searcher + private(set) public var startIndex: Index + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + + var state = searcher.state(for: base, in: base.startIndex..: IteratorProtocol { + public typealias Base = Searcher.Searched + + let base: Base + let searcher: Searcher + var state: Searcher.State + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + self.state = searcher.state(for: base, in: base.startIndex.. _MatchResult? { + searcher.matchingSearch(base, &state).map { range, result in + _MatchResult(match: base[range], result: result) + } + } +} + +extension MatchesCollection: Sequence { + public func makeIterator() -> MatchesIterator { + Iterator(base: base, searcher: searcher) + } +} + +extension MatchesCollection: Collection { + // TODO: Custom `SubSequence` for the sake of more efficient slice iteration + + public struct Index { + var match: (range: Range, match: Searcher.Match)? + var state: Searcher.State + } + + public var endIndex: Index { + // TODO: Avoid calling `state(for:startingAt)` here + Index( + match: nil, + state: searcher.state(for: base, in: base.startIndex.. Index { + var index = index + formIndex(after: &index) + return index + } + + public subscript(index: Index) -> _MatchResult { + guard let (range, result) = index.match else { + fatalError("Cannot subscript using endIndex") + } + return _MatchResult(match: base[range], result: result) + } +} + +extension MatchesCollection.Index: Comparable { + public static func == (lhs: Self, rhs: Self) -> Bool { + switch (lhs.match?.range, rhs.match?.range) { + case (nil, nil): + return true + case (nil, _?), (_?, nil): + return false + case (let lhs?, let rhs?): + return lhs.lowerBound == rhs.lowerBound + } + } + + public static func < (lhs: Self, rhs: Self) -> Bool { + switch (lhs.match?.range, rhs.match?.range) { + case (nil, _): + return false + case (_, nil): + return true + case (let lhs?, let rhs?): + return lhs.lowerBound < rhs.lowerBound + } + } +} + +// MARK: `ReversedMatchesCollection` +// TODO: reversed matches + +public struct ReversedMatchesCollection< + Searcher: BackwardMatchingCollectionSearcher +> { + public typealias Base = Searcher.BackwardSearched + + let base: Base + let searcher: Searcher + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + } +} + +extension ReversedMatchesCollection: Sequence { + public struct Iterator: IteratorProtocol { + let base: Base + let searcher: Searcher + var state: Searcher.BackwardState + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + self.state = searcher.backwardState( + for: base, in: base.startIndex.. _BackwardMatchResult? { + searcher.matchingSearchBack(base, &state).map { range, result in + _BackwardMatchResult(match: base[range], result: result) + } + } + } + + public func makeIterator() -> Iterator { + Iterator(base: base, searcher: searcher) + } +} + +// TODO: `Collection` conformance + +// MARK: `CollectionSearcher` algorithms + +extension Collection { + public func matches( + of searcher: S + ) -> MatchesCollection where S.Searched == Self { + MatchesCollection(base: self, searcher: searcher) + } +} + +extension BidirectionalCollection { + public func matchesFromBack( + of searcher: S + ) -> ReversedMatchesCollection where S.BackwardSearched == Self { + ReversedMatchesCollection(base: self, searcher: searcher) + } +} + +// MARK: Regex algorithms + +extension BidirectionalCollection where SubSequence == Substring { + public func matches( + of regex: R + ) -> MatchesCollection> { + matches(of: RegexConsumer(regex)) + } + + public func matchesFromBack( + of regex: R + ) -> ReversedMatchesCollection> { + matchesFromBack(of: RegexConsumer(regex)) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift new file mode 100644 index 000000000..0972752aa --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public protocol MatchingCollectionConsumer: CollectionConsumer { + associatedtype Match + func matchingConsuming( + _ consumed: Consumed, + in range: Range + ) -> (upperBound: Consumed.Index, match: Match)? +} + +extension MatchingCollectionConsumer { + public func consuming( + _ consumed: Consumed, + in range: Range + ) -> Consumed.Index? { + matchingConsuming(consumed, in: range)?.upperBound + } +} + +// MARK: Consuming from the back + +public protocol BidirectionalMatchingCollectionConsumer: + MatchingCollectionConsumer, BidirectionalCollectionConsumer +{ + func matchingConsumingBack( + _ consumed: Consumed, + in range: Range + ) -> (lowerBound: Consumed.Index, match: Match)? +} + +extension BidirectionalMatchingCollectionConsumer { + public func consumingBack( + _ consumed: Consumed, + in range: Range + ) -> Consumed.Index? { + matchingConsumingBack(consumed, in: range)?.lowerBound + } +} + diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift new file mode 100644 index 000000000..eadb46f9e --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift @@ -0,0 +1,131 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public protocol MatchingCollectionSearcher: CollectionSearcher { + associatedtype Match + func matchingSearch( + _ searched: Searched, + _ state: inout State + ) -> (range: Range, match: Match)? +} + +extension MatchingCollectionSearcher { + public func search( + _ searched: Searched, + _ state: inout State + ) -> Range? { + matchingSearch(searched, &state)?.range + } +} + +public protocol MatchingStatelessCollectionSearcher: + MatchingCollectionSearcher, StatelessCollectionSearcher +{ + func matchingSearch( + _ searched: Searched, + in range: Range + ) -> (range: Range, match: Match)? +} + +extension MatchingStatelessCollectionSearcher { + // for disambiguation between the `MatchingCollectionSearcher` and + // `StatelessCollectionSearcher` overloads + public func search( + _ searched: Searched, + _ state: inout State + ) -> Range? { + matchingSearch(searched, &state)?.range + } + + public func matchingSearch( + _ searched: Searched, + _ state: inout State + ) -> (range: Range, match: Match)? { + // TODO: deduplicate this logic with `StatelessCollectionSearcher`? + + guard + case .index(let index) = state.position, + let (range, value) = matchingSearch(searched, in: index.. + ) -> Range? { + matchingSearch(searched, in: range)?.range + } +} + +// MARK: Searching from the back + +public protocol BackwardMatchingCollectionSearcher: BackwardCollectionSearcher { + associatedtype Match + func matchingSearchBack( + _ searched: BackwardSearched, + _ state: inout BackwardState + ) -> (range: Range, match: Match)? +} + +public protocol BackwardMatchingStatelessCollectionSearcher: + BackwardMatchingCollectionSearcher, BackwardStatelessCollectionSearcher +{ + func matchingSearchBack( + _ searched: BackwardSearched, + in range: Range + ) -> (range: Range, match: Match)? +} + +extension BackwardMatchingStatelessCollectionSearcher { + public func searchBack( + _ searched: BackwardSearched, + in range: Range + ) -> Range? { + matchingSearchBack(searched, in: range)?.range + } + + public func matchingSearchBack( + _ searched: BackwardSearched, + _ state: inout BackwardState) -> (range: Range, match: Match)? + { + // TODO: deduplicate this logic with `StatelessBackwardCollectionSearcher`? + + guard + case .index(let index) = state.position, + let (range, value) = matchingSearchBack(searched, in: state.end.. Range? } -public protocol StatelessBackwardCollectionSearcher: BackwardCollectionSearcher +public protocol BackwardStatelessCollectionSearcher: BackwardCollectionSearcher where BackwardState == DefaultSearcherState { func searchBack( @@ -94,7 +93,7 @@ public protocol StatelessBackwardCollectionSearcher: BackwardCollectionSearcher ) -> Range? } -extension StatelessBackwardCollectionSearcher { +extension BackwardStatelessCollectionSearcher { public func backwardState( for searched: BackwardSearched, in range: Range @@ -124,7 +123,3 @@ extension StatelessBackwardCollectionSearcher { return range } } - -public protocol BidirectionalCollectionSearcher: CollectionSearcher, - BackwardCollectionSearcher - where Searched == BackwardSearched {} diff --git a/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift b/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift index b42cfbc52..0670daa44 100644 --- a/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift @@ -35,7 +35,7 @@ extension ConsumerSearcher: StatelessCollectionSearcher { } extension ConsumerSearcher: BackwardCollectionSearcher, - StatelessBackwardCollectionSearcher + BackwardStatelessCollectionSearcher where Consumer: BidirectionalCollectionConsumer { typealias BackwardSearched = Consumer.Consumed @@ -57,3 +57,53 @@ extension ConsumerSearcher: BackwardCollectionSearcher, } } } + +extension ConsumerSearcher: MatchingCollectionSearcher, + MatchingStatelessCollectionSearcher + where Consumer: MatchingCollectionConsumer +{ + typealias Match = Consumer.Match + + func matchingSearch( + _ searched: Searched, + in range: Range + ) -> (range: Range, match: Consumer.Match)? { + var start = range.lowerBound + while true { + if let (end, value) = consumer.matchingConsuming( + searched, + in: start.. + ) -> (range: Range, match: Match)? { + var end = range.upperBound + while true { + if let (start, value) = consumer.matchingConsumingBack( + searched, in: range.lowerBound.. + in inputRange: Range, + mode: MatchMode = .wholeString ) -> RegexMatch? { // Casts a Swift tuple to the custom `Tuple`, assuming their memory // layout is compatible. @@ -138,7 +139,7 @@ extension RegexProtocol { if regex.ast.hasCapture { let vm = HareVM(program: regex.program.legacyLoweredProgram) guard let (range, captures) = vm.execute( - input: input, in: inputRange, mode: .wholeString + input: input, in: inputRange, mode: mode )?.destructure else { return nil } @@ -157,7 +158,7 @@ extension RegexProtocol { } let executor = Executor(program: regex.program.loweredProgram) guard let result = executor.execute( - input: input, in: inputRange, mode: .wholeString + input: input, in: inputRange, mode: mode ) else { return nil } diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index b51f12100..5a848a6e4 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -114,6 +114,51 @@ class RegexConsumerTests: XCTestCase { expectReplace("aab", "a+", "X", "Xb") expectReplace("aab", "a*", "X", "XXbX") } + + func testMatches() { + let regex = Regex(OneOrMore(.digit).capture { 2 * Int($0)! }) + let str = "foo 160 bar 99 baz" + XCTAssertEqual(str.matches(of: regex).map(\.result.1), [320, 198]) + } + + func testMatchReplace() { + func replaceTest( + _ regex: R, + input: String, + result: String, + _ replace: (_MatchResult>) -> String, + file: StaticString = #file, + line: UInt = #line + ) { + XCTAssertEqual(input.replacing(regex, with: replace), result) + } + + let int = OneOrMore(.digit).capture { Int($0)! } + + replaceTest( + int, + input: "foo 160 bar 99 baz", + result: "foo 240 bar 143 baz", + { match in String(match.result.1, radix: 8) }) + + replaceTest( + Regex { int; "+"; int }, + input: "9+16, 0+3, 5+5, 99+1", + result: "25, 3, 10, 100", + { match in "\(match.result.1 + match.result.2)" }) + + replaceTest( + OneOrMore { int; "," }, + input: "3,5,8,0, 1,0,2,-5,x8,8,", + result: "16 3-5x16", + { match in "\(match.result.1.reduce(0, +))" }) + + replaceTest( + Regex { int; "x"; int; Optionally { "x"; int } }, + input: "2x3 5x4x3 6x0 1x2x3x4", + result: "6 60 0 6x4", + { match in "\(match.result.1 * match.result.2 * (match.result.3 ?? 1))" }) + } func testAdHoc() { let r = try! Regex("a|b+")