Skip to content

Commit e719d6d

Browse files
author
Tim Vermeulen
authored
Initial matcher/validator support (#122)
* Initial matchers support * Conform `RegexConsumer` to the matcher protocols * Add `firstMatch`/`lastMatch` and match find/replace * Add `_MatchResult` and allow `RegexProtocol` in algorithms
1 parent cf2d910 commit e719d6d

21 files changed

+756
-133
lines changed

Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ extension BidirectionalCollection where Element: Comparable {
4040
// MARK: Regex algorithms
4141

4242
extension BidirectionalCollection where SubSequence == Substring {
43-
public func contains<Capture>(_ regex: Regex<Capture>) -> Bool {
43+
public func contains<R: RegexProtocol>(_ regex: R) -> Bool {
4444
contains(RegexConsumer(regex))
4545
}
4646
}

Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ extension BidirectionalCollection where Element: Comparable {
5656
// MARK: Regex algorithms
5757

5858
extension BidirectionalCollection where SubSequence == Substring {
59-
public func firstRange<Capture>(of regex: Regex<Capture>) -> Range<Index>? {
59+
public func firstRange<R: RegexProtocol>(of regex: R) -> Range<Index>? {
6060
firstRange(of: RegexConsumer(regex))
6161
}
6262

63-
public func lastRange<Capture>(of regex: Regex<Capture>) -> Range<Index>? {
63+
public func lastRange<R: RegexProtocol>(of regex: R) -> Range<Index>? {
6464
lastRange(of: RegexConsumer(regex))
6565
}
6666
}

Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,6 @@ public struct RangesCollection<Searcher: CollectionSearcher> {
3333
}
3434
}
3535

36-
extension RangesCollection where Searcher: BidirectionalCollectionSearcher {
37-
public func reversed() -> ReversedRangesCollection<Searcher> {
38-
ReversedRangesCollection(base: base, searcher: searcher)
39-
}
40-
41-
public var last: Range<Base.Index>? {
42-
base.lastRange(of: searcher)
43-
}
44-
}
45-
4636
public struct RangesIterator<Searcher: CollectionSearcher>: IteratorProtocol {
4737
public typealias Base = Searcher.Searched
4838

@@ -139,18 +129,6 @@ public struct ReversedRangesCollection<Searcher: BackwardCollectionSearcher> {
139129
}
140130
}
141131

142-
extension ReversedRangesCollection
143-
where Searcher: BidirectionalCollectionSearcher
144-
{
145-
public func reversed() -> RangesCollection<Searcher> {
146-
RangesCollection(base: base, searcher: searcher)
147-
}
148-
149-
public var last: Range<Base.Index>? {
150-
base.firstRange(of: searcher)
151-
}
152-
}
153-
154132
extension ReversedRangesCollection: Sequence {
155133
public struct Iterator: IteratorProtocol {
156134
let base: Base
@@ -238,15 +216,15 @@ extension BidirectionalCollection where Element: Comparable {
238216
// MARK: Regex algorithms
239217

240218
extension BidirectionalCollection where SubSequence == Substring {
241-
public func ranges<Capture>(
242-
of regex: Regex<Capture>
243-
) -> RangesCollection<RegexConsumer<Self>> {
219+
public func ranges<R: RegexProtocol>(
220+
of regex: R
221+
) -> RangesCollection<RegexConsumer<R, Self>> {
244222
ranges(of: RegexConsumer(regex))
245223
}
246224

247-
public func rangesFromBack<Capture>(
248-
of regex: Regex<Capture>
249-
) -> ReversedRangesCollection<RegexConsumer<Self>> {
225+
public func rangesFromBack<R: RegexProtocol>(
226+
of regex: R
227+
) -> ReversedRangesCollection<RegexConsumer<R, Self>> {
250228
rangesFromBack(of: RegexConsumer(regex))
251229
}
252230
}

Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,13 @@ extension RangeReplaceableCollection {
5050
maxReplacements: maxReplacements)
5151
}
5252

53-
public mutating func replace<Searcher: CollectionSearcher, R: Collection>(
53+
public mutating func replace<
54+
Searcher: CollectionSearcher, Replacement: Collection
55+
>(
5456
_ searcher: Searcher,
55-
with replacement: R,
57+
with replacement: Replacement,
5658
maxReplacements: Int = .max
57-
) where Searcher.Searched == SubSequence, R.Element == Element {
59+
) where Searcher.Searched == SubSequence, Replacement.Element == Element {
5860
self = replacing(
5961
searcher,
6062
with: replacement,
@@ -147,8 +149,8 @@ extension RangeReplaceableCollection
147149
// MARK: Regex algorithms
148150

149151
extension RangeReplaceableCollection where SubSequence == Substring {
150-
public func replacing<Capture, Replacement: Collection>(
151-
_ regex: Regex<Capture>,
152+
public func replacing<R: RegexProtocol, Replacement: Collection>(
153+
_ regex: R,
152154
with replacement: Replacement,
153155
subrange: Range<Index>,
154156
maxReplacements: Int = .max
@@ -160,8 +162,8 @@ extension RangeReplaceableCollection where SubSequence == Substring {
160162
maxReplacements: maxReplacements)
161163
}
162164

163-
public func replacing<Capture, Replacement: Collection>(
164-
_ regex: Regex<Capture>,
165+
public func replacing<R: RegexProtocol, Replacement: Collection>(
166+
_ regex: R,
165167
with replacement: Replacement,
166168
maxReplacements: Int = .max
167169
) -> Self where Replacement.Element == Element {
@@ -172,8 +174,8 @@ extension RangeReplaceableCollection where SubSequence == Substring {
172174
maxReplacements: maxReplacements)
173175
}
174176

175-
public mutating func replace<Capture, Replacement: Collection>(
176-
_ regex: Regex<Capture>,
177+
public mutating func replace<R: RegexProtocol, Replacement: Collection>(
178+
_ regex: R,
177179
with replacement: Replacement,
178180
maxReplacements: Int = .max
179181
) where Replacement.Element == Element {

Sources/_StringProcessing/Algorithms/Algorithms/Split.swift

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ public struct SplitCollection<Searcher: CollectionSearcher> {
2525
}
2626
}
2727

28-
extension SplitCollection where Searcher: BidirectionalCollectionSearcher {
29-
public func reversed() -> ReversedSplitCollection<Searcher> {
30-
ReversedSplitCollection(ranges: ranges.reversed())
31-
}
32-
}
33-
3428
extension SplitCollection: Sequence {
3529
public struct Iterator: IteratorProtocol {
3630
let base: Base
@@ -144,14 +138,6 @@ public struct ReversedSplitCollection<Searcher: BackwardCollectionSearcher> {
144138
}
145139
}
146140

147-
extension ReversedSplitCollection
148-
where Searcher: BidirectionalCollectionSearcher
149-
{
150-
public func reversed() -> SplitCollection<Searcher> {
151-
SplitCollection(ranges: ranges.reversed())
152-
}
153-
}
154-
155141
extension ReversedSplitCollection: Sequence {
156142
public struct Iterator: IteratorProtocol {
157143
let base: Base
@@ -289,15 +275,15 @@ extension BidirectionalCollection where Element: Comparable {
289275
// MARK: Regex algorithms
290276

291277
extension BidirectionalCollection where SubSequence == Substring {
292-
public func split<Capture>(
293-
by separator: Regex<Capture>
294-
) -> SplitCollection<RegexConsumer<Self>> {
278+
public func split<R: RegexProtocol>(
279+
by separator: R
280+
) -> SplitCollection<RegexConsumer<R, Self>> {
295281
split(by: RegexConsumer(separator))
296282
}
297283

298-
public func splitFromBack<Capture>(
299-
by separator: Regex<Capture>
300-
) -> ReversedSplitCollection<RegexConsumer<Self>> {
284+
public func splitFromBack<R: RegexProtocol>(
285+
by separator: R
286+
) -> ReversedSplitCollection<RegexConsumer<R, Self>> {
301287
splitFromBack(by: RegexConsumer(separator))
302288
}
303289
}

Sources/_StringProcessing/Algorithms/Algorithms/StartsWith.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ extension BidirectionalCollection where Element: Equatable {
4848
// MARK: Regex algorithms
4949

5050
extension BidirectionalCollection where SubSequence == Substring {
51-
public func starts<Capture>(with regex: Regex<Capture>) -> Bool {
51+
public func starts<R: RegexProtocol>(with regex: R) -> Bool {
5252
starts(with: RegexConsumer(regex))
5353
}
5454

55-
public func ends<Capture>(with regex: Regex<Capture>) -> Bool {
55+
public func ends<R: RegexProtocol>(with regex: R) -> Bool {
5656
ends(with: RegexConsumer(regex))
5757
}
5858
}

Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -257,48 +257,48 @@ extension RangeReplaceableCollection
257257
// MARK: Regex algorithms
258258

259259
extension BidirectionalCollection where SubSequence == Substring {
260-
public func trimmingPrefix<Capture>(_ regex: Regex<Capture>) -> SubSequence {
260+
public func trimmingPrefix<R: RegexProtocol>(_ regex: R) -> SubSequence {
261261
trimmingPrefix(RegexConsumer(regex))
262262
}
263263

264-
public func trimmingSuffix<Capture>(_ regex: Regex<Capture>) -> SubSequence {
264+
public func trimmingSuffix<R: RegexProtocol>(_ regex: R) -> SubSequence {
265265
trimmingSuffix(RegexConsumer(regex))
266266
}
267267

268-
public func trimming<Capture>(_ regex: Regex<Capture>) -> SubSequence {
268+
public func trimming<R: RegexProtocol>(_ regex: R) -> SubSequence {
269269
trimming(RegexConsumer(regex))
270270
}
271271
}
272272

273273
extension RangeReplaceableCollection
274274
where Self: BidirectionalCollection, SubSequence == Substring
275275
{
276-
public mutating func trimPrefix<Capture>(_ regex: Regex<Capture>) {
276+
public mutating func trimPrefix<R: RegexProtocol>(_ regex: R) {
277277
trimPrefix(RegexConsumer(regex))
278278
}
279279

280-
public mutating func trimSuffix<Capture>(_ regex: Regex<Capture>) {
280+
public mutating func trimSuffix<R: RegexProtocol>(_ regex: R) {
281281
trimSuffix(RegexConsumer(regex))
282282
}
283283

284-
public mutating func trim<Capture>(_ regex: Regex<Capture>) {
285-
let consumer = RegexConsumer<Self>(regex)
284+
public mutating func trim<R: RegexProtocol>(_ regex: R) {
285+
let consumer = RegexConsumer<R, Self>(regex)
286286
trimPrefix(consumer)
287287
trimSuffix(consumer)
288288
}
289289
}
290290

291291
extension Substring {
292-
public mutating func trimPrefix<Capture>(_ regex: Regex<Capture>) {
292+
public mutating func trimPrefix<R: RegexProtocol>(_ regex: R) {
293293
trimPrefix(RegexConsumer(regex))
294294
}
295295

296-
public mutating func trimSuffix<Capture>(_ regex: Regex<Capture>) {
296+
public mutating func trimSuffix<R: RegexProtocol>(_ regex: R) {
297297
trimSuffix(RegexConsumer(regex))
298298
}
299299

300-
public mutating func trim<Capture>(_ regex: Regex<Capture>) {
301-
let consumer = RegexConsumer<Self>(regex)
300+
public mutating func trim<R: RegexProtocol>(_ regex: R) {
301+
let consumer = RegexConsumer<R, Self>(regex)
302302
trimPrefix(consumer)
303303
trimSuffix(consumer)
304304
}

Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ extension PredicateConsumer: StatelessCollectionSearcher {
5656
}
5757

5858
extension PredicateConsumer: BackwardCollectionSearcher,
59-
StatelessBackwardCollectionSearcher
59+
BackwardStatelessCollectionSearcher
6060
where Searched: BidirectionalCollection
6161
{
6262
public typealias BackwardSearched = Consumed

Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,56 +9,54 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
import _MatchingEngine
12+
public struct RegexConsumer<
13+
R: RegexProtocol, Consumed: BidirectionalCollection
14+
> where Consumed.SubSequence == Substring {
15+
// TODO: Should `Regex` itself implement these protocols?
16+
let regex: R
1317

14-
public struct RegexConsumer<Consumed: BidirectionalCollection>
15-
where Consumed.SubSequence == Substring
16-
{
17-
// TODO: consider let, for now lets us toggle tracing
18-
var vm: Executor
19-
20-
// FIXME: Possibility of fatal error isn't user friendly
21-
public init<Capture>(_ regex: Regex<Capture>) {
22-
do {
23-
self.vm = .init(
24-
program: try Compiler(ast: regex.ast).emit())
25-
} catch {
26-
fatalError("error: \(error)")
27-
}
18+
public init(_ regex: R) {
19+
self.regex = regex
2820
}
21+
}
2922

30-
public init(parsing regex: String) throws {
31-
self.vm = try _compileRegex(regex)
32-
}
33-
34-
func _consuming(
23+
extension RegexConsumer {
24+
func _matchingConsuming(
3525
_ consumed: Substring, in range: Range<String.Index>
36-
) -> String.Index? {
37-
let result = vm.execute(
38-
input: consumed.base,
39-
in: range,
40-
mode: .partialFromFront)
41-
return result?.range.upperBound
26+
) -> (upperBound: String.Index, match: Match)? {
27+
guard let result = regex._match(
28+
consumed.base,
29+
in: range, mode: .partialFromFront
30+
) else { return nil }
31+
return (result.range.upperBound, result.match)
4232
}
33+
}
34+
35+
// TODO: Explicitly implement the non-matching consumer/searcher protocols as
36+
// well, taking advantage of the fact that the captures can be ignored
37+
38+
extension RegexConsumer: MatchingCollectionConsumer {
39+
public typealias Match = R.Match
4340

44-
public func consuming(
41+
public func matchingConsuming(
4542
_ consumed: Consumed, in range: Range<Consumed.Index>
46-
) -> String.Index? {
47-
_consuming(consumed[...], in: range)
43+
) -> (upperBound: String.Index, match: Match)? {
44+
_matchingConsuming(consumed[...], in: range)
4845
}
4946
}
5047

5148
// TODO: We'll want to bake backwards into the engine
52-
extension RegexConsumer: BidirectionalCollectionConsumer {
53-
public func consumingBack(
49+
extension RegexConsumer: BidirectionalMatchingCollectionConsumer {
50+
public func matchingConsumingBack(
5451
_ consumed: Consumed, in range: Range<Consumed.Index>
55-
) -> String.Index? {
52+
) -> (lowerBound: String.Index, match: Match)? {
5653
var i = range.lowerBound
5754
while true {
58-
if let end = _consuming(consumed[...], in: i..<range.upperBound),
59-
end == range.upperBound
60-
{
61-
return i
55+
if let (end, capture) = _matchingConsuming(
56+
consumed[...],
57+
in: i..<range.upperBound
58+
), end == range.upperBound {
59+
return (i, capture)
6260
} else if i == range.upperBound {
6361
return nil
6462
} else {
@@ -68,26 +66,26 @@ extension RegexConsumer: BidirectionalCollectionConsumer {
6866
}
6967
}
7068

71-
extension RegexConsumer: StatelessCollectionSearcher {
69+
extension RegexConsumer: MatchingStatelessCollectionSearcher {
7270
public typealias Searched = Consumed
7371

7472
// TODO: We'll want to bake search into the engine so it can
7573
// take advantage of the structure of the regex itself and
7674
// its own internal state
77-
public func search(
75+
public func matchingSearch(
7876
_ searched: Searched, in range: Range<Searched.Index>
79-
) -> Range<String.Index>? {
80-
ConsumerSearcher(consumer: self).search(searched, in: range)
77+
) -> (range: Range<String.Index>, match: Match)? {
78+
ConsumerSearcher(consumer: self).matchingSearch(searched, in: range)
8179
}
8280
}
8381

8482
// TODO: Bake in search-back to engine too
85-
extension RegexConsumer: StatelessBackwardCollectionSearcher {
83+
extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher {
8684
public typealias BackwardSearched = Consumed
8785

88-
public func searchBack(
86+
public func matchingSearchBack(
8987
_ searched: BackwardSearched, in range: Range<Searched.Index>
90-
) -> Range<String.Index>? {
91-
ConsumerSearcher(consumer: self).searchBack(searched, in: range)
88+
) -> (range: Range<String.Index>, match: Match)? {
89+
ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range)
9290
}
9391
}

0 commit comments

Comments
 (0)