From 1724b2ce569c88ae9bc5c9bd587ea5afbef967b8 Mon Sep 17 00:00:00 2001 From: Tim Vermeulen Date: Thu, 20 Jan 2022 22:46:57 +0100 Subject: [PATCH 1/4] Initial matchers support --- .../Algorithms/Algorithms/Ranges.swift | 22 --- .../Algorithms/Algorithms/Split.swift | 14 -- .../Consumers/PredicateConsumer.swift | 2 +- .../Algorithms/Consumers/RegexConsumer.swift | 2 +- .../Algorithms/Matching/Matches.swift | 160 ++++++++++++++++++ .../Matching/MatchingCollectionConsumer.swift | 48 ++++++ .../Matching/MatchingCollectionSearcher.swift | 123 ++++++++++++++ .../Searchers/CollectionSearcher.swift | 9 +- .../Searchers/ConsumerSearcher.swift | 52 +++++- .../Searchers/NaivePatternSearcher.swift | 2 +- .../Searchers/PredicateSearcher.swift | 4 +- 11 files changed, 388 insertions(+), 50 deletions(-) create mode 100644 Sources/_StringProcessing/Algorithms/Matching/Matches.swift create mode 100644 Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift create mode 100644 Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index d4b407202..abe4de79f 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -33,16 +33,6 @@ public struct RangesCollection { } } -extension RangesCollection where Searcher: BidirectionalCollectionSearcher { - public func reversed() -> ReversedRangesCollection { - ReversedRangesCollection(base: base, searcher: searcher) - } - - public var last: Range? { - base.lastRange(of: searcher) - } -} - public struct RangesIterator: IteratorProtocol { public typealias Base = Searcher.Searched @@ -139,18 +129,6 @@ public struct ReversedRangesCollection { } } -extension ReversedRangesCollection - where Searcher: BidirectionalCollectionSearcher -{ - public func reversed() -> RangesCollection { - RangesCollection(base: base, searcher: searcher) - } - - public var last: Range? { - base.firstRange(of: searcher) - } -} - extension ReversedRangesCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index cf64af478..87f7e5047 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -25,12 +25,6 @@ public struct SplitCollection { } } -extension SplitCollection where Searcher: BidirectionalCollectionSearcher { - public func reversed() -> ReversedSplitCollection { - ReversedSplitCollection(ranges: ranges.reversed()) - } -} - extension SplitCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base @@ -144,14 +138,6 @@ public struct ReversedSplitCollection { } } -extension ReversedSplitCollection - where Searcher: BidirectionalCollectionSearcher -{ - public func reversed() -> SplitCollection { - SplitCollection(ranges: ranges.reversed()) - } -} - extension ReversedSplitCollection: Sequence { public struct Iterator: IteratorProtocol { let base: Base diff --git a/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift index 52b4802d9..c9b92b9ec 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift @@ -56,7 +56,7 @@ extension PredicateConsumer: StatelessCollectionSearcher { } extension PredicateConsumer: BackwardCollectionSearcher, - StatelessBackwardCollectionSearcher + BackwardStatelessCollectionSearcher where Searched: BidirectionalCollection { public typealias BackwardSearched = Consumed diff --git a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift index 12ceaa155..0c5ee9dfd 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift @@ -82,7 +82,7 @@ extension RegexConsumer: StatelessCollectionSearcher { } // TODO: Bake in search-back to engine too -extension RegexConsumer: StatelessBackwardCollectionSearcher { +extension RegexConsumer: BackwardStatelessCollectionSearcher { public typealias BackwardSearched = Consumed public func searchBack( diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift new file mode 100644 index 000000000..30f013b9e --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// MARK: `MatchesCollection` + +public struct MatchesCollection { + public typealias Base = Searcher.Searched + + let base: Base + let searcher: Searcher + private(set) public var startIndex: Index + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + + var state = searcher.state(for: base, in: base.startIndex..: IteratorProtocol { + public typealias Base = Searcher.Searched + + let base: Base + let searcher: Searcher + var state: Searcher.State + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + self.state = searcher.state(for: base, in: base.startIndex.. (Searcher.Match, Range)? { + searcher.matchingSearch(base, &state) + } +} + +extension MatchesCollection: Sequence { + public func makeIterator() -> MatchesIterator { + Iterator(base: base, searcher: searcher) + } +} + +extension MatchesCollection: Collection { + // TODO: Custom `SubSequence` for the sake of more efficient slice iteration + + public struct Index { + var match: (value: Searcher.Match, range: Range)? + var state: Searcher.State + } + + public var endIndex: Index { + // TODO: Avoid calling `state(for:startingAt)` here + Index( + match: nil, + state: searcher.state(for: base, in: base.startIndex.. Index { + var index = index + formIndex(after: &index) + return index + } + + public subscript(index: Index) -> (Searcher.Match, Range) { + guard let match = index.match else { + fatalError("Cannot subscript using endIndex") + } + return match + } +} + +extension MatchesCollection.Index: Comparable { + public static func == (lhs: Self, rhs: Self) -> Bool { + switch (lhs.match?.range, rhs.match?.range) { + case (nil, nil): + return true + case (nil, _?), (_?, nil): + return false + case (let lhs?, let rhs?): + return lhs.lowerBound == rhs.lowerBound + } + } + + public static func < (lhs: Self, rhs: Self) -> Bool { + switch (lhs.match?.range, rhs.match?.range) { + case (nil, _): + return false + case (_, nil): + return true + case (let lhs?, let rhs?): + return lhs.lowerBound < rhs.lowerBound + } + } +} + +// MARK: `ReversedMatchesCollection` +// TODO: reversed matches + +public struct ReversedMatchesCollection< + Searcher: BackwardMatchingCollectionSearcher +> { + public typealias Base = Searcher.BackwardSearched + + let base: Base + let searcher: Searcher + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + } +} + +extension ReversedMatchesCollection: Sequence { + public struct Iterator: IteratorProtocol { + let base: Base + let searcher: Searcher + var state: Searcher.BackwardState + + init(base: Base, searcher: Searcher) { + self.base = base + self.searcher = searcher + self.state = searcher.backwardState( + for: base, in: base.startIndex.. (Searcher.Match, Range)? { + searcher.matchingSearchBack(base, &state) + } + } + + public func makeIterator() -> Iterator { + Iterator(base: base, searcher: searcher) + } +} + +//// TODO: `Collection` conformance diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift new file mode 100644 index 000000000..1f20ab5f9 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public protocol MatchingCollectionConsumer: CollectionConsumer { + associatedtype Match + func matchingConsuming( + _ consumed: Consumed, + in range: Range + ) -> (Match, Consumed.Index)? +} + +extension MatchingCollectionConsumer { + public func consuming( + _ consumed: Consumed, + in range: Range + ) -> Consumed.Index? { + matchingConsuming(consumed, in: range)?.1 + } +} + +// MARK: Consuming from the back + +public protocol BidirectionalMatchingCollectionConsumer: + MatchingCollectionConsumer, BidirectionalCollectionConsumer +{ + func matchingConsumingBack( + _ consumed: Consumed, + in range: Range + ) -> (Match, Consumed.Index)? +} + +extension BidirectionalMatchingCollectionConsumer { + public func consumingBack( + _ consumed: Consumed, + in range: Range + ) -> Consumed.Index? { + matchingConsumingBack(consumed, in: range)?.1 + } +} + diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift new file mode 100644 index 000000000..dae5ae4bd --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift @@ -0,0 +1,123 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public protocol MatchingCollectionSearcher: CollectionSearcher { + associatedtype Match + func matchingSearch( + _ searched: Searched, + _ state: inout State + ) -> (Match, Range)? +} + +extension MatchingCollectionSearcher { + public func search( + _ searched: Searched, + _ state: inout State + ) -> Range? { + matchingSearch(searched, &state)?.1 + } +} + +public protocol MatchingStatelessCollectionSearcher: + MatchingCollectionSearcher, StatelessCollectionSearcher +{ + func matchingSearch( + _ searched: Searched, + in range: Range + ) -> (Match, Range)? +} + +extension MatchingStatelessCollectionSearcher { + // for disambiguation + public func search( + _ searched: Searched, + _ state: inout State + ) -> Range? { + matchingSearch(searched, &state)?.1 + } + + public func matchingSearch( + _ searched: Searched, + _ state: inout State + ) -> (Match, Range)? { + // TODO: deduplicate this logic with `StatelessCollectionSearcher`? + + guard + case .index(let index) = state.position, + let (value, range) = matchingSearch(searched, in: index.. + ) -> Range? { + matchingSearch(searched, in: range)?.1 + } +} + +// MARK: Searching from the back + +public protocol BackwardMatchingCollectionSearcher: BackwardCollectionSearcher { + associatedtype Match + func matchingSearchBack( + _ searched: BackwardSearched, + _ state: inout BackwardState + ) -> (Match, Range)? +} + +public protocol BackwardMatchingStatelessCollectionSearcher: + BackwardMatchingCollectionSearcher, BackwardStatelessCollectionSearcher +{ + func matchingSearchBack( + _ searched: BackwardSearched, + in range: Range + ) -> (Match, Range)? +} + +extension BackwardMatchingStatelessCollectionSearcher { + public func matchingSearchBack( + _ searched: BackwardSearched, + _ state: inout BackwardState) -> (Match, Range)? + { + // TODO: deduplicate this logic with `StatelessBackwardCollectionSearcher`? + + guard + case .index(let index) = state.position, + let (value, range) = matchingSearchBack(searched, in: state.end.. Range? } -public protocol StatelessBackwardCollectionSearcher: BackwardCollectionSearcher +public protocol BackwardStatelessCollectionSearcher: BackwardCollectionSearcher where BackwardState == DefaultSearcherState { func searchBack( @@ -94,7 +93,7 @@ public protocol StatelessBackwardCollectionSearcher: BackwardCollectionSearcher ) -> Range? } -extension StatelessBackwardCollectionSearcher { +extension BackwardStatelessCollectionSearcher { public func backwardState( for searched: BackwardSearched, in range: Range @@ -124,7 +123,3 @@ extension StatelessBackwardCollectionSearcher { return range } } - -public protocol BidirectionalCollectionSearcher: CollectionSearcher, - BackwardCollectionSearcher - where Searched == BackwardSearched {} diff --git a/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift b/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift index b42cfbc52..624f589f7 100644 --- a/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift @@ -35,7 +35,7 @@ extension ConsumerSearcher: StatelessCollectionSearcher { } extension ConsumerSearcher: BackwardCollectionSearcher, - StatelessBackwardCollectionSearcher + BackwardStatelessCollectionSearcher where Consumer: BidirectionalCollectionConsumer { typealias BackwardSearched = Consumer.Consumed @@ -57,3 +57,53 @@ extension ConsumerSearcher: BackwardCollectionSearcher, } } } + +extension ConsumerSearcher: MatchingCollectionSearcher, + MatchingStatelessCollectionSearcher + where Consumer: MatchingCollectionConsumer +{ + typealias Match = Consumer.Match + + func matchingSearch( + _ searched: Searched, + in range: Range + ) -> (Consumer.Match, Range)? { + var start = range.lowerBound + while true { + if let (value, end) = consumer.matchingConsuming( + searched, + in: start.. + ) -> (Match, Range)? { + var end = range.upperBound + while true { + if let (value, start) = consumer.matchingConsumingBack( + searched, in: range.lowerBound.. Date: Thu, 20 Jan 2022 23:15:38 +0100 Subject: [PATCH 2/4] Conform `RegexConsumer` to the matcher protocols --- .../Algorithms/Algorithms/Ranges.swift | 4 +- .../Algorithms/Algorithms/Split.swift | 4 +- .../Algorithms/Algorithms/Trim.swift | 4 +- .../Algorithms/Consumers/RegexConsumer.swift | 76 +++++++++---------- Sources/_StringProcessing/RegexDSL/Core.swift | 7 +- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index abe4de79f..52cbe919b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -218,13 +218,13 @@ extension BidirectionalCollection where Element: Comparable { extension BidirectionalCollection where SubSequence == Substring { public func ranges( of regex: Regex - ) -> RangesCollection> { + ) -> RangesCollection> { ranges(of: RegexConsumer(regex)) } public func rangesFromBack( of regex: Regex - ) -> ReversedRangesCollection> { + ) -> ReversedRangesCollection> { rangesFromBack(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 87f7e5047..2d7266aa7 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -277,13 +277,13 @@ extension BidirectionalCollection where Element: Comparable { extension BidirectionalCollection where SubSequence == Substring { public func split( by separator: Regex - ) -> SplitCollection> { + ) -> SplitCollection> { split(by: RegexConsumer(separator)) } public func splitFromBack( by separator: Regex - ) -> ReversedSplitCollection> { + ) -> ReversedSplitCollection> { splitFromBack(by: RegexConsumer(separator)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 9a5afc582..b47347235 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -282,7 +282,7 @@ extension RangeReplaceableCollection } public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } @@ -298,7 +298,7 @@ extension Substring { } public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } diff --git a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift index 0c5ee9dfd..3f1a25c73 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift @@ -9,56 +9,50 @@ // //===----------------------------------------------------------------------===// -import _MatchingEngine +public struct RegexConsumer< + Consumed: BidirectionalCollection, Capture: MatchProtocol +> where Consumed.SubSequence == Substring { + // TODO: Should `Regex` itself implement these protocols? + let regex: Regex -public struct RegexConsumer - where Consumed.SubSequence == Substring -{ - // TODO: consider let, for now lets us toggle tracing - var vm: Executor - - // FIXME: Possibility of fatal error isn't user friendly - public init(_ regex: Regex) { - do { - self.vm = .init( - program: try Compiler(ast: regex.ast).emit()) - } catch { - fatalError("error: \(error)") - } - } - - public init(parsing regex: String) throws { - self.vm = try _compileRegex(regex) + public init(_ regex: Regex) { + self.regex = regex } - func _consuming( + func _matchingConsuming( _ consumed: Substring, in range: Range - ) -> String.Index? { - let result = vm.execute( - input: consumed.base, - in: range, - mode: .partialFromFront) - return result?.range.upperBound + ) -> (Capture, String.Index)? { + guard let result = regex._match( + consumed.base, + in: range, mode: .partialFromFront + ) else { return nil } + return (result.match, result.range.upperBound) } - - public func consuming( +} + +// TODO: Explicitly implement the non-matching consumer/searcher protocols as +// well, taking advantage of the fact that the captures can be ignored + +extension RegexConsumer: MatchingCollectionConsumer { + public func matchingConsuming( _ consumed: Consumed, in range: Range - ) -> String.Index? { - _consuming(consumed[...], in: range) + ) -> (Capture, String.Index)? { + _matchingConsuming(consumed[...], in: range) } } // TODO: We'll want to bake backwards into the engine -extension RegexConsumer: BidirectionalCollectionConsumer { - public func consumingBack( +extension RegexConsumer: BidirectionalMatchingCollectionConsumer { + public func matchingConsumingBack( _ consumed: Consumed, in range: Range - ) -> String.Index? { + ) -> (Capture, String.Index)? { var i = range.lowerBound while true { - if let end = _consuming(consumed[...], in: i.. - ) -> Range? { - ConsumerSearcher(consumer: self).search(searched, in: range) + ) -> (Capture, Range)? { + ConsumerSearcher(consumer: self).matchingSearch(searched, in: range) } } diff --git a/Sources/_StringProcessing/RegexDSL/Core.swift b/Sources/_StringProcessing/RegexDSL/Core.swift index 73b6fc133..988a22d71 100644 --- a/Sources/_StringProcessing/RegexDSL/Core.swift +++ b/Sources/_StringProcessing/RegexDSL/Core.swift @@ -126,7 +126,8 @@ extension RegexProtocol { // legacy virtual machines. func _match( _ input: String, - in inputRange: Range + in inputRange: Range, + mode: MatchMode = .wholeString ) -> RegexMatch? { // Casts a Swift tuple to the custom `Tuple`, assuming their memory // layout is compatible. @@ -138,7 +139,7 @@ extension RegexProtocol { if regex.ast.hasCapture { let vm = HareVM(program: regex.program.legacyLoweredProgram) guard let (range, captures) = vm.execute( - input: input, in: inputRange, mode: .wholeString + input: input, in: inputRange, mode: mode )?.destructure else { return nil } @@ -157,7 +158,7 @@ extension RegexProtocol { } let executor = Executor(program: regex.program.loweredProgram) guard let result = executor.execute( - input: input, in: inputRange, mode: .wholeString + input: input, in: inputRange, mode: mode ) else { return nil } From d549b4edb1470633128d2a6d747a375cd41cfb98 Mon Sep 17 00:00:00 2001 From: Tim Vermeulen Date: Fri, 21 Jan 2022 00:13:33 +0100 Subject: [PATCH 3/4] Add `firstMatch`/`lastMatch` and match find/replace --- .../Algorithms/Algorithms/Replace.swift | 8 +- .../Algorithms/Consumers/RegexConsumer.swift | 10 +- .../Algorithms/Matching/FirstMatch.swift | 48 ++++++++ .../Algorithms/Matching/MatchReplace.swift | 114 ++++++++++++++++++ .../Algorithms/Matching/Matches.swift | 38 +++++- .../Matching/MatchingCollectionSearcher.swift | 10 +- Tests/RegexTests/AlgorithmsTests.swift | 14 +++ 7 files changed, 232 insertions(+), 10 deletions(-) create mode 100644 Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift create mode 100644 Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index 120378cec..70bfdd6d9 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -50,11 +50,13 @@ extension RangeReplaceableCollection { maxReplacements: maxReplacements) } - public mutating func replace( + public mutating func replace< + Searcher: CollectionSearcher, Replacement: Collection + >( _ searcher: Searcher, - with replacement: R, + with replacement: Replacement, maxReplacements: Int = .max - ) where Searcher.Searched == SubSequence, R.Element == Element { + ) where Searcher.Searched == SubSequence, Replacement.Element == Element { self = replacing( searcher, with: replacement, diff --git a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift index 3f1a25c73..d2e7bcb27 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift @@ -34,6 +34,8 @@ public struct RegexConsumer< // well, taking advantage of the fact that the captures can be ignored extension RegexConsumer: MatchingCollectionConsumer { + public typealias Match = Capture + public func matchingConsuming( _ consumed: Consumed, in range: Range ) -> (Capture, String.Index)? { @@ -76,12 +78,12 @@ extension RegexConsumer: MatchingStatelessCollectionSearcher { } // TODO: Bake in search-back to engine too -extension RegexConsumer: BackwardStatelessCollectionSearcher { +extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher { public typealias BackwardSearched = Consumed - public func searchBack( + public func matchingSearchBack( _ searched: BackwardSearched, in range: Range - ) -> Range? { - ConsumerSearcher(consumer: self).searchBack(searched, in: range) + ) -> (Capture, Range)? { + ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range) } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift new file mode 100644 index 000000000..8d01d4394 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// MARK: `CollectionSearcher` algorithms + +extension Collection { + public func firstMatch( + of searcher: S + ) -> (S.Match, Range)? where S.Searched == Self { + var state = searcher.state(for: self, in: startIndex..( + of searcher: S + ) -> (S.Match, Range)? + where S.BackwardSearched == Self + { + var state = searcher.backwardState(for: self, in: startIndex..( + of regex: Regex + ) -> (Capture, Range)? { + firstMatch(of: RegexConsumer(regex)) + } + + public func lastMatch( + of regex: Regex + ) -> (Capture, Range)? { + lastMatch(of: RegexConsumer(regex)) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift new file mode 100644 index 000000000..454bb01e1 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift @@ -0,0 +1,114 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// MARK: `MatchingCollectionSearcher` algorithms + +extension RangeReplaceableCollection { + public func replacing< + Searcher: MatchingCollectionSearcher, Replacement: Collection + >( + _ searcher: Searcher, + with replacement: (Searcher.Match, + Range) -> Replacement, + subrange: Range, + maxReplacements: Int = .max + ) -> Self where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + precondition(maxReplacements >= 0) + + var index = subrange.lowerBound + var result = Self() + result.append(contentsOf: self[..( + _ searcher: Searcher, + with replacement: (Searcher.Match, + Range) -> Replacement, + maxReplacements: Int = .max + ) -> Self where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + replacing( + searcher, + with: replacement, + subrange: startIndex..( + _ searcher: Searcher, + with replacement: (Searcher.Match, + Range) -> Replacement, + maxReplacements: Int = .max + ) where Searcher.Searched == SubSequence, Replacement.Element == Element { + self = replacing( + searcher, + with: replacement, + maxReplacements: maxReplacements) + } +} + +// MARK: Regex algorithms + +extension RangeReplaceableCollection where SubSequence == Substring { + public func replacing( + _ regex: Regex, + with replacement: (Capture, Range) -> Replacement, + subrange: Range, + maxReplacements: Int = .max + ) -> Self where Replacement.Element == Element { + replacing( + RegexConsumer(regex), + with: replacement, + subrange: subrange, + maxReplacements: maxReplacements) + } + + public func replacing( + _ regex: Regex, + with replacement: (Capture, Range) -> Replacement, + maxReplacements: Int = .max + ) -> Self where Replacement.Element == Element { + replacing( + regex, + with: replacement, + subrange: startIndex..( + _ regex: Regex, + with replacement: (Capture, Range) -> Replacement, + maxReplacements: Int = .max + ) where Replacement.Element == Element { + self = replacing( + regex, + with: replacement, + maxReplacements: maxReplacements) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift index 30f013b9e..9c528784d 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/Matches.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/Matches.swift @@ -63,7 +63,7 @@ extension MatchesCollection: Collection { // TODO: Custom `SubSequence` for the sake of more efficient slice iteration public struct Index { - var match: (value: Searcher.Match, range: Range)? + var match: (value: Searcher.Match, range: Range)? var state: Searcher.State } @@ -157,4 +157,38 @@ extension ReversedMatchesCollection: Sequence { } } -//// TODO: `Collection` conformance +// TODO: `Collection` conformance + +// MARK: `CollectionSearcher` algorithms + +extension Collection { + public func matches( + of searcher: S + ) -> MatchesCollection where S.Searched == Self { + MatchesCollection(base: self, searcher: searcher) + } +} + +extension BidirectionalCollection { + public func matchesFromBack( + of searcher: S + ) -> ReversedMatchesCollection where S.BackwardSearched == Self { + ReversedMatchesCollection(base: self, searcher: searcher) + } +} + +// MARK: Regex algorithms + +extension BidirectionalCollection where SubSequence == Substring { + public func matches( + of regex: Regex + ) -> MatchesCollection> { + matches(of: RegexConsumer(regex)) + } + + public func matchesFromBack( + of regex: Regex + ) -> ReversedMatchesCollection> { + matchesFromBack(of: RegexConsumer(regex)) + } +} diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift index dae5ae4bd..29a791475 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift @@ -36,7 +36,8 @@ public protocol MatchingStatelessCollectionSearcher: } extension MatchingStatelessCollectionSearcher { - // for disambiguation + // for disambiguation between the `MatchingCollectionSearcher` and + // `StatelessCollectionSearcher` overloads public func search( _ searched: Searched, _ state: inout State @@ -96,6 +97,13 @@ public protocol BackwardMatchingStatelessCollectionSearcher: } extension BackwardMatchingStatelessCollectionSearcher { + public func searchBack( + _ searched: BackwardSearched, + in range: Range + ) -> Range? { + matchingSearchBack(searched, in: range)?.1 + } + public func matchingSearchBack( _ searched: BackwardSearched, _ state: inout BackwardState) -> (Match, Range)? diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index b51f12100..b0318d766 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -114,6 +114,20 @@ class RegexConsumerTests: XCTestCase { expectReplace("aab", "a+", "X", "Xb") expectReplace("aab", "a*", "X", "XXbX") } + + func testMatches() { + let regex = Regex(OneOrMore(.digit).capture { 2 * Int($0)! }) + let str = "foo 160 bar 99 baz" + XCTAssertEqual(str.matches(of: regex).map(\.0.1), [320, 198]) + } + + func testMatchReplace() { + let regex = Regex(OneOrMore(.digit).capture { Int($0)! }) + let str = "foo 160 bar 99 baz" + XCTAssertEqual( + str.replacing(regex, with: { match, _ in String(match.1, radix: 8) }), + "foo 240 bar 143 baz") + } func testAdHoc() { let r = try! Regex("a|b+") From e89dcbb08710dfb8043ec87214387659d4bd234e Mon Sep 17 00:00:00 2001 From: Tim Vermeulen Date: Thu, 27 Jan 2022 20:07:21 +0100 Subject: [PATCH 4/4] Add `_MatchResult` and allow `RegexProtocol` in algorithms --- .../Algorithms/Algorithms/Contains.swift | 2 +- .../Algorithms/Algorithms/FirstRange.swift | 4 +- .../Algorithms/Algorithms/Ranges.swift | 12 ++-- .../Algorithms/Algorithms/Replace.swift | 12 ++-- .../Algorithms/Algorithms/Split.swift | 12 ++-- .../Algorithms/Algorithms/StartsWith.swift | 4 +- .../Algorithms/Algorithms/Trim.swift | 22 +++---- .../Algorithms/Consumers/RegexConsumer.swift | 28 +++++---- .../Algorithms/Matching/FirstMatch.swift | 24 +++++--- .../Algorithms/Matching/MatchReplace.swift | 61 +++++++++---------- .../Algorithms/Matching/MatchResult.swift | 28 +++++++++ .../Algorithms/Matching/Matches.swift | 32 +++++----- .../Matching/MatchingCollectionConsumer.swift | 8 +-- .../Matching/MatchingCollectionSearcher.swift | 28 ++++----- .../Searchers/ConsumerSearcher.swift | 12 ++-- Tests/RegexTests/AlgorithmsTests.swift | 43 +++++++++++-- 16 files changed, 200 insertions(+), 132 deletions(-) create mode 100644 Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift index fdc718ce4..03e2c53ee 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift @@ -40,7 +40,7 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func contains(_ regex: Regex) -> Bool { + public func contains(_ regex: R) -> Bool { contains(RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift index 97f7248cd..64a5eb943 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift @@ -56,11 +56,11 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func firstRange(of regex: Regex) -> Range? { + public func firstRange(of regex: R) -> Range? { firstRange(of: RegexConsumer(regex)) } - public func lastRange(of regex: Regex) -> Range? { + public func lastRange(of regex: R) -> Range? { lastRange(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 52cbe919b..aefdbce3f 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -216,15 +216,15 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func ranges( - of regex: Regex - ) -> RangesCollection> { + public func ranges( + of regex: R + ) -> RangesCollection> { ranges(of: RegexConsumer(regex)) } - public func rangesFromBack( - of regex: Regex - ) -> ReversedRangesCollection> { + public func rangesFromBack( + of regex: R + ) -> ReversedRangesCollection> { rangesFromBack(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index 70bfdd6d9..36a28b381 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -149,8 +149,8 @@ extension RangeReplaceableCollection // MARK: Regex algorithms extension RangeReplaceableCollection where SubSequence == Substring { - public func replacing( - _ regex: Regex, + public func replacing( + _ regex: R, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max @@ -162,8 +162,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: maxReplacements) } - public func replacing( - _ regex: Regex, + public func replacing( + _ regex: R, with replacement: Replacement, maxReplacements: Int = .max ) -> Self where Replacement.Element == Element { @@ -174,8 +174,8 @@ extension RangeReplaceableCollection where SubSequence == Substring { maxReplacements: maxReplacements) } - public mutating func replace( - _ regex: Regex, + public mutating func replace( + _ regex: R, with replacement: Replacement, maxReplacements: Int = .max ) where Replacement.Element == Element { diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 2d7266aa7..ba2cda30b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -275,15 +275,15 @@ extension BidirectionalCollection where Element: Comparable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func split( - by separator: Regex - ) -> SplitCollection> { + public func split( + by separator: R + ) -> SplitCollection> { split(by: RegexConsumer(separator)) } - public func splitFromBack( - by separator: Regex - ) -> ReversedSplitCollection> { + public func splitFromBack( + by separator: R + ) -> ReversedSplitCollection> { splitFromBack(by: RegexConsumer(separator)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift b/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift index 0ed6faf2c..ee9432fb4 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift @@ -48,11 +48,11 @@ extension BidirectionalCollection where Element: Equatable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func starts(with regex: Regex) -> Bool { + public func starts(with regex: R) -> Bool { starts(with: RegexConsumer(regex)) } - public func ends(with regex: Regex) -> Bool { + public func ends(with regex: R) -> Bool { ends(with: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index b47347235..b2438bb3b 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -257,15 +257,15 @@ extension RangeReplaceableCollection // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func trimmingPrefix(_ regex: Regex) -> SubSequence { + public func trimmingPrefix(_ regex: R) -> SubSequence { trimmingPrefix(RegexConsumer(regex)) } - public func trimmingSuffix(_ regex: Regex) -> SubSequence { + public func trimmingSuffix(_ regex: R) -> SubSequence { trimmingSuffix(RegexConsumer(regex)) } - public func trimming(_ regex: Regex) -> SubSequence { + public func trimming(_ regex: R) -> SubSequence { trimming(RegexConsumer(regex)) } } @@ -273,32 +273,32 @@ extension BidirectionalCollection where SubSequence == Substring { extension RangeReplaceableCollection where Self: BidirectionalCollection, SubSequence == Substring { - public mutating func trimPrefix(_ regex: Regex) { + public mutating func trimPrefix(_ regex: R) { trimPrefix(RegexConsumer(regex)) } - public mutating func trimSuffix(_ regex: Regex) { + public mutating func trimSuffix(_ regex: R) { trimSuffix(RegexConsumer(regex)) } - public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + public mutating func trim(_ regex: R) { + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } } extension Substring { - public mutating func trimPrefix(_ regex: Regex) { + public mutating func trimPrefix(_ regex: R) { trimPrefix(RegexConsumer(regex)) } - public mutating func trimSuffix(_ regex: Regex) { + public mutating func trimSuffix(_ regex: R) { trimSuffix(RegexConsumer(regex)) } - public mutating func trim(_ regex: Regex) { - let consumer = RegexConsumer(regex) + public mutating func trim(_ regex: R) { + let consumer = RegexConsumer(regex) trimPrefix(consumer) trimSuffix(consumer) } diff --git a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift index d2e7bcb27..5af689cbc 100644 --- a/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift @@ -10,23 +10,25 @@ //===----------------------------------------------------------------------===// public struct RegexConsumer< - Consumed: BidirectionalCollection, Capture: MatchProtocol + R: RegexProtocol, Consumed: BidirectionalCollection > where Consumed.SubSequence == Substring { // TODO: Should `Regex` itself implement these protocols? - let regex: Regex + let regex: R - public init(_ regex: Regex) { + public init(_ regex: R) { self.regex = regex } - +} + +extension RegexConsumer { func _matchingConsuming( _ consumed: Substring, in range: Range - ) -> (Capture, String.Index)? { + ) -> (upperBound: String.Index, match: Match)? { guard let result = regex._match( consumed.base, in: range, mode: .partialFromFront ) else { return nil } - return (result.match, result.range.upperBound) + return (result.range.upperBound, result.match) } } @@ -34,11 +36,11 @@ public struct RegexConsumer< // well, taking advantage of the fact that the captures can be ignored extension RegexConsumer: MatchingCollectionConsumer { - public typealias Match = Capture + public typealias Match = R.Match public func matchingConsuming( _ consumed: Consumed, in range: Range - ) -> (Capture, String.Index)? { + ) -> (upperBound: String.Index, match: Match)? { _matchingConsuming(consumed[...], in: range) } } @@ -47,14 +49,14 @@ extension RegexConsumer: MatchingCollectionConsumer { extension RegexConsumer: BidirectionalMatchingCollectionConsumer { public func matchingConsumingBack( _ consumed: Consumed, in range: Range - ) -> (Capture, String.Index)? { + ) -> (lowerBound: String.Index, match: Match)? { var i = range.lowerBound while true { - if let (capture, end) = _matchingConsuming( + if let (end, capture) = _matchingConsuming( consumed[...], in: i.. - ) -> (Capture, Range)? { + ) -> (range: Range, match: Match)? { ConsumerSearcher(consumer: self).matchingSearch(searched, in: range) } } @@ -83,7 +85,7 @@ extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher { public func matchingSearchBack( _ searched: BackwardSearched, in range: Range - ) -> (Capture, Range)? { + ) -> (range: Range, match: Match)? { ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range) } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift index 8d01d4394..91d33e123 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift @@ -14,35 +14,39 @@ extension Collection { public func firstMatch( of searcher: S - ) -> (S.Match, Range)? where S.Searched == Self { + ) -> _MatchResult? where S.Searched == Self { var state = searcher.state(for: self, in: startIndex..( of searcher: S - ) -> (S.Match, Range)? + ) -> _BackwardMatchResult? where S.BackwardSearched == Self { var state = searcher.backwardState(for: self, in: startIndex..( - of regex: Regex - ) -> (Capture, Range)? { + public func firstMatch( + of regex: R + ) -> _MatchResult>? { firstMatch(of: RegexConsumer(regex)) } - public func lastMatch( - of regex: Regex - ) -> (Capture, Range)? { + public func lastMatch( + of regex: R + ) -> _BackwardMatchResult>? { lastMatch(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift index 454bb01e1..2feb09df0 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchReplace.swift @@ -16,11 +16,10 @@ extension RangeReplaceableCollection { Searcher: MatchingCollectionSearcher, Replacement: Collection >( _ searcher: Searcher, - with replacement: (Searcher.Match, - Range) -> Replacement, + with replacement: (_MatchResult) throws -> Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where Searcher.Searched == SubSequence, + ) rethrows -> Self where Searcher.Searched == SubSequence, Replacement.Element == Element { precondition(maxReplacements >= 0) @@ -29,12 +28,12 @@ extension RangeReplaceableCollection { var result = Self() result.append(contentsOf: self[..( _ searcher: Searcher, - with replacement: (Searcher.Match, - Range) -> Replacement, + with replacement: (_MatchResult) throws -> Replacement, maxReplacements: Int = .max - ) -> Self where Searcher.Searched == SubSequence, - Replacement.Element == Element + ) rethrows -> Self where Searcher.Searched == SubSequence, + Replacement.Element == Element { - replacing( + try replacing( searcher, with: replacement, subrange: startIndex..( _ searcher: Searcher, - with replacement: (Searcher.Match, - Range) -> Replacement, + with replacement: (_MatchResult) throws -> Replacement, maxReplacements: Int = .max - ) where Searcher.Searched == SubSequence, Replacement.Element == Element { - self = replacing( + ) rethrows where Searcher.Searched == SubSequence, + Replacement.Element == Element + { + self = try replacing( searcher, with: replacement, maxReplacements: maxReplacements) @@ -76,37 +75,37 @@ extension RangeReplaceableCollection { // MARK: Regex algorithms extension RangeReplaceableCollection where SubSequence == Substring { - public func replacing( - _ regex: Regex, - with replacement: (Capture, Range) -> Replacement, + public func replacing( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, subrange: Range, maxReplacements: Int = .max - ) -> Self where Replacement.Element == Element { - replacing( + ) rethrows -> Self where Replacement.Element == Element { + try replacing( RegexConsumer(regex), with: replacement, subrange: subrange, maxReplacements: maxReplacements) } - public func replacing( - _ regex: Regex, - with replacement: (Capture, Range) -> Replacement, + public func replacing( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, maxReplacements: Int = .max - ) -> Self where Replacement.Element == Element { - replacing( + ) rethrows -> Self where Replacement.Element == Element { + try replacing( regex, with: replacement, subrange: startIndex..( - _ regex: Regex, - with replacement: (Capture, Range) -> Replacement, + public mutating func replace( + _ regex: R, + with replacement: (_MatchResult>) throws -> Replacement, maxReplacements: Int = .max - ) where Replacement.Element == Element { - self = replacing( + ) rethrows where Replacement.Element == Element { + self = try replacing( regex, with: replacement, maxReplacements: maxReplacements) diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift new file mode 100644 index 000000000..2b1f19093 --- /dev/null +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public struct _MatchResult { + public let match: S.Searched.SubSequence + public let result: S.Match + + public var range: Range { + match.startIndex.. { + public let match: S.BackwardSearched.SubSequence + public let result: S.Match + + public var range: Range { + match.startIndex.. (Searcher.Match, Range)? { - searcher.matchingSearch(base, &state) + public mutating func next() -> _MatchResult? { + searcher.matchingSearch(base, &state).map { range, result in + _MatchResult(match: base[range], result: result) + } } } @@ -63,7 +65,7 @@ extension MatchesCollection: Collection { // TODO: Custom `SubSequence` for the sake of more efficient slice iteration public struct Index { - var match: (value: Searcher.Match, range: Range)? + var match: (range: Range, match: Searcher.Match)? var state: Searcher.State } @@ -85,11 +87,11 @@ extension MatchesCollection: Collection { return index } - public subscript(index: Index) -> (Searcher.Match, Range) { - guard let match = index.match else { + public subscript(index: Index) -> _MatchResult { + guard let (range, result) = index.match else { fatalError("Cannot subscript using endIndex") } - return match + return _MatchResult(match: base[range], result: result) } } @@ -147,8 +149,10 @@ extension ReversedMatchesCollection: Sequence { for: base, in: base.startIndex.. (Searcher.Match, Range)? { - searcher.matchingSearchBack(base, &state) + public mutating func next() -> _BackwardMatchResult? { + searcher.matchingSearchBack(base, &state).map { range, result in + _BackwardMatchResult(match: base[range], result: result) + } } } @@ -180,15 +184,15 @@ extension BidirectionalCollection { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - public func matches( - of regex: Regex - ) -> MatchesCollection> { + public func matches( + of regex: R + ) -> MatchesCollection> { matches(of: RegexConsumer(regex)) } - public func matchesFromBack( - of regex: Regex - ) -> ReversedMatchesCollection> { + public func matchesFromBack( + of regex: R + ) -> ReversedMatchesCollection> { matchesFromBack(of: RegexConsumer(regex)) } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift index 1f20ab5f9..0972752aa 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift @@ -14,7 +14,7 @@ public protocol MatchingCollectionConsumer: CollectionConsumer { func matchingConsuming( _ consumed: Consumed, in range: Range - ) -> (Match, Consumed.Index)? + ) -> (upperBound: Consumed.Index, match: Match)? } extension MatchingCollectionConsumer { @@ -22,7 +22,7 @@ extension MatchingCollectionConsumer { _ consumed: Consumed, in range: Range ) -> Consumed.Index? { - matchingConsuming(consumed, in: range)?.1 + matchingConsuming(consumed, in: range)?.upperBound } } @@ -34,7 +34,7 @@ public protocol BidirectionalMatchingCollectionConsumer: func matchingConsumingBack( _ consumed: Consumed, in range: Range - ) -> (Match, Consumed.Index)? + ) -> (lowerBound: Consumed.Index, match: Match)? } extension BidirectionalMatchingCollectionConsumer { @@ -42,7 +42,7 @@ extension BidirectionalMatchingCollectionConsumer { _ consumed: Consumed, in range: Range ) -> Consumed.Index? { - matchingConsumingBack(consumed, in: range)?.1 + matchingConsumingBack(consumed, in: range)?.lowerBound } } diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift index 29a791475..eadb46f9e 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift @@ -14,7 +14,7 @@ public protocol MatchingCollectionSearcher: CollectionSearcher { func matchingSearch( _ searched: Searched, _ state: inout State - ) -> (Match, Range)? + ) -> (range: Range, match: Match)? } extension MatchingCollectionSearcher { @@ -22,7 +22,7 @@ extension MatchingCollectionSearcher { _ searched: Searched, _ state: inout State ) -> Range? { - matchingSearch(searched, &state)?.1 + matchingSearch(searched, &state)?.range } } @@ -32,7 +32,7 @@ public protocol MatchingStatelessCollectionSearcher: func matchingSearch( _ searched: Searched, in range: Range - ) -> (Match, Range)? + ) -> (range: Range, match: Match)? } extension MatchingStatelessCollectionSearcher { @@ -42,18 +42,18 @@ extension MatchingStatelessCollectionSearcher { _ searched: Searched, _ state: inout State ) -> Range? { - matchingSearch(searched, &state)?.1 + matchingSearch(searched, &state)?.range } public func matchingSearch( _ searched: Searched, _ state: inout State - ) -> (Match, Range)? { + ) -> (range: Range, match: Match)? { // TODO: deduplicate this logic with `StatelessCollectionSearcher`? guard case .index(let index) = state.position, - let (value, range) = matchingSearch(searched, in: index.. ) -> Range? { - matchingSearch(searched, in: range)?.1 + matchingSearch(searched, in: range)?.range } } @@ -84,7 +84,7 @@ public protocol BackwardMatchingCollectionSearcher: BackwardCollectionSearcher { func matchingSearchBack( _ searched: BackwardSearched, _ state: inout BackwardState - ) -> (Match, Range)? + ) -> (range: Range, match: Match)? } public protocol BackwardMatchingStatelessCollectionSearcher: @@ -93,7 +93,7 @@ public protocol BackwardMatchingStatelessCollectionSearcher: func matchingSearchBack( _ searched: BackwardSearched, in range: Range - ) -> (Match, Range)? + ) -> (range: Range, match: Match)? } extension BackwardMatchingStatelessCollectionSearcher { @@ -101,18 +101,18 @@ extension BackwardMatchingStatelessCollectionSearcher { _ searched: BackwardSearched, in range: Range ) -> Range? { - matchingSearchBack(searched, in: range)?.1 + matchingSearchBack(searched, in: range)?.range } public func matchingSearchBack( _ searched: BackwardSearched, - _ state: inout BackwardState) -> (Match, Range)? + _ state: inout BackwardState) -> (range: Range, match: Match)? { // TODO: deduplicate this logic with `StatelessBackwardCollectionSearcher`? guard case .index(let index) = state.position, - let (value, range) = matchingSearchBack(searched, in: state.end.. - ) -> (Consumer.Match, Range)? { + ) -> (range: Range, match: Consumer.Match)? { var start = range.lowerBound while true { - if let (value, end) = consumer.matchingConsuming( + if let (end, value) = consumer.matchingConsuming( searched, in: start.. - ) -> (Match, Range)? { + ) -> (range: Range, match: Match)? { var end = range.upperBound while true { - if let (value, start) = consumer.matchingConsumingBack( + if let (start, value) = consumer.matchingConsumingBack( searched, in: range.lowerBound..( + _ regex: R, + input: String, + result: String, + _ replace: (_MatchResult>) -> String, + file: StaticString = #file, + line: UInt = #line + ) { + XCTAssertEqual(input.replacing(regex, with: replace), result) + } + + let int = OneOrMore(.digit).capture { Int($0)! } + + replaceTest( + int, + input: "foo 160 bar 99 baz", + result: "foo 240 bar 143 baz", + { match in String(match.result.1, radix: 8) }) + + replaceTest( + Regex { int; "+"; int }, + input: "9+16, 0+3, 5+5, 99+1", + result: "25, 3, 10, 100", + { match in "\(match.result.1 + match.result.2)" }) + + replaceTest( + OneOrMore { int; "," }, + input: "3,5,8,0, 1,0,2,-5,x8,8,", + result: "16 3-5x16", + { match in "\(match.result.1.reduce(0, +))" }) + + replaceTest( + Regex { int; "x"; int; Optionally { "x"; int } }, + input: "2x3 5x4x3 6x0 1x2x3x4", + result: "6 60 0 6x4", + { match in "\(match.result.1 * match.result.2 * (match.result.3 ?? 1))" }) } func testAdHoc() {