Skip to content

Initial matcher/validator support #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 2 additions & 24 deletions Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ public struct RangesCollection<Searcher: CollectionSearcher> {
}
}

extension RangesCollection where Searcher: BidirectionalCollectionSearcher {
public func reversed() -> ReversedRangesCollection<Searcher> {
ReversedRangesCollection(base: base, searcher: searcher)
}

public var last: Range<Base.Index>? {
base.lastRange(of: searcher)
}
}

public struct RangesIterator<Searcher: CollectionSearcher>: IteratorProtocol {
public typealias Base = Searcher.Searched

Expand Down Expand Up @@ -139,18 +129,6 @@ public struct ReversedRangesCollection<Searcher: BackwardCollectionSearcher> {
}
}

extension ReversedRangesCollection
where Searcher: BidirectionalCollectionSearcher
{
public func reversed() -> RangesCollection<Searcher> {
RangesCollection(base: base, searcher: searcher)
}

public var last: Range<Base.Index>? {
base.firstRange(of: searcher)
}
}

extension ReversedRangesCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -240,13 +218,13 @@ extension BidirectionalCollection where Element: Comparable {
extension BidirectionalCollection where SubSequence == Substring {
public func ranges<Capture>(
of regex: Regex<Capture>
) -> RangesCollection<RegexConsumer<Self>> {
) -> RangesCollection<RegexConsumer<Self, Capture>> {
ranges(of: RegexConsumer(regex))
}

public func rangesFromBack<Capture>(
of regex: Regex<Capture>
) -> ReversedRangesCollection<RegexConsumer<Self>> {
) -> ReversedRangesCollection<RegexConsumer<Self, Capture>> {
rangesFromBack(of: RegexConsumer(regex))
}
}
8 changes: 5 additions & 3 deletions Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ extension RangeReplaceableCollection {
maxReplacements: maxReplacements)
}

public mutating func replace<Searcher: CollectionSearcher, R: Collection>(
public mutating func replace<
Searcher: CollectionSearcher, Replacement: Collection
>(
_ searcher: Searcher,
with replacement: R,
with replacement: Replacement,
maxReplacements: Int = .max
) where Searcher.Searched == SubSequence, R.Element == Element {
) where Searcher.Searched == SubSequence, Replacement.Element == Element {
self = replacing(
searcher,
with: replacement,
Expand Down
18 changes: 2 additions & 16 deletions Sources/_StringProcessing/Algorithms/Algorithms/Split.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ public struct SplitCollection<Searcher: CollectionSearcher> {
}
}

extension SplitCollection where Searcher: BidirectionalCollectionSearcher {
public func reversed() -> ReversedSplitCollection<Searcher> {
ReversedSplitCollection(ranges: ranges.reversed())
}
}

extension SplitCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -144,14 +138,6 @@ public struct ReversedSplitCollection<Searcher: BackwardCollectionSearcher> {
}
}

extension ReversedSplitCollection
where Searcher: BidirectionalCollectionSearcher
{
public func reversed() -> SplitCollection<Searcher> {
SplitCollection(ranges: ranges.reversed())
}
}

extension ReversedSplitCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -291,13 +277,13 @@ extension BidirectionalCollection where Element: Comparable {
extension BidirectionalCollection where SubSequence == Substring {
public func split<Capture>(
by separator: Regex<Capture>
) -> SplitCollection<RegexConsumer<Self>> {
) -> SplitCollection<RegexConsumer<Self, Capture>> {
split(by: RegexConsumer(separator))
}

public func splitFromBack<Capture>(
by separator: Regex<Capture>
) -> ReversedSplitCollection<RegexConsumer<Self>> {
) -> ReversedSplitCollection<RegexConsumer<Self, Capture>> {
splitFromBack(by: RegexConsumer(separator))
}
}
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ extension RangeReplaceableCollection
}

public mutating func trim<Capture>(_ regex: Regex<Capture>) {
let consumer = RegexConsumer<Self>(regex)
let consumer = RegexConsumer<Self, Capture>(regex)
trimPrefix(consumer)
trimSuffix(consumer)
}
Expand All @@ -298,7 +298,7 @@ extension Substring {
}

public mutating func trim<Capture>(_ regex: Regex<Capture>) {
let consumer = RegexConsumer<Self>(regex)
let consumer = RegexConsumer<Self, Capture>(regex)
trimPrefix(consumer)
trimSuffix(consumer)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extension PredicateConsumer: StatelessCollectionSearcher {
}

extension PredicateConsumer: BackwardCollectionSearcher,
StatelessBackwardCollectionSearcher
BackwardStatelessCollectionSearcher
where Searched: BidirectionalCollection
{
public typealias BackwardSearched = Consumed
Expand Down
84 changes: 40 additions & 44 deletions Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,56 +9,52 @@
//
//===----------------------------------------------------------------------===//

import _MatchingEngine
public struct RegexConsumer<
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks fine to me for now.

Outside scope of this PR, we might make RegexProtocol itself a consumer/searcher/matcher. But, I think it's best to establish parent-child relationships after we have the fundamental model in place. The important thing is getting this code and conformances in.

BTW, why does this need to be public?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some consumer/searcher algorithms return a type generic over the searcher, so the regex overloads return a type generic over RegexConsumer. So same story as PredicateConsumer at the moment.

Consumed: BidirectionalCollection, Capture: MatchProtocol
> where Consumed.SubSequence == Substring {
// TODO: Should `Regex` itself implement these protocols?
let regex: Regex<Capture>

public struct RegexConsumer<Consumed: BidirectionalCollection>
where Consumed.SubSequence == Substring
{
// TODO: consider let, for now lets us toggle tracing
var vm: Executor

// FIXME: Possibility of fatal error isn't user friendly
public init<Capture>(_ regex: Regex<Capture>) {
do {
self.vm = .init(
program: try Compiler(ast: regex.ast).emit())
} catch {
fatalError("error: \(error)")
}
}

public init(parsing regex: String) throws {
self.vm = try _compileRegex(regex)
public init(_ regex: Regex<Capture>) {
self.regex = regex
}

func _consuming(
func _matchingConsuming(
_ consumed: Substring, in range: Range<String.Index>
) -> String.Index? {
let result = vm.execute(
input: consumed.base,
in: range,
mode: .partialFromFront)
return result?.range.upperBound
) -> (Capture, String.Index)? {
guard let result = regex._match(
consumed.base,
in: range, mode: .partialFromFront
) else { return nil }
return (result.match, result.range.upperBound)
}
}

// TODO: Explicitly implement the non-matching consumer/searcher protocols as
// well, taking advantage of the fact that the captures can be ignored

extension RegexConsumer: MatchingCollectionConsumer {
public typealias Match = Capture

public func consuming(
public func matchingConsuming(
_ consumed: Consumed, in range: Range<Consumed.Index>
) -> String.Index? {
_consuming(consumed[...], in: range)
) -> (Capture, String.Index)? {
_matchingConsuming(consumed[...], in: range)
}
}

// TODO: We'll want to bake backwards into the engine
extension RegexConsumer: BidirectionalCollectionConsumer {
public func consumingBack(
extension RegexConsumer: BidirectionalMatchingCollectionConsumer {
public func matchingConsumingBack(
_ consumed: Consumed, in range: Range<Consumed.Index>
) -> String.Index? {
) -> (Capture, String.Index)? {
var i = range.lowerBound
while true {
if let end = _consuming(consumed[...], in: i..<range.upperBound),
end == range.upperBound
{
return i
if let (capture, end) = _matchingConsuming(
consumed[...],
in: i..<range.upperBound
), end == range.upperBound {
return (capture, i)
} else if i == range.upperBound {
return nil
} else {
Expand All @@ -68,26 +64,26 @@ extension RegexConsumer: BidirectionalCollectionConsumer {
}
}

extension RegexConsumer: StatelessCollectionSearcher {
extension RegexConsumer: MatchingStatelessCollectionSearcher {
public typealias Searched = Consumed

// TODO: We'll want to bake search into the engine so it can
// take advantage of the structure of the regex itself and
// its own internal state
public func search(
public func matchingSearch(
_ searched: Searched, in range: Range<Searched.Index>
) -> Range<String.Index>? {
ConsumerSearcher(consumer: self).search(searched, in: range)
) -> (Capture, Range<String.Index>)? {
ConsumerSearcher(consumer: self).matchingSearch(searched, in: range)
}
}

// TODO: Bake in search-back to engine too
extension RegexConsumer: StatelessBackwardCollectionSearcher {
extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher {
public typealias BackwardSearched = Consumed

public func searchBack(
public func matchingSearchBack(
_ searched: BackwardSearched, in range: Range<Searched.Index>
) -> Range<String.Index>? {
ConsumerSearcher(consumer: self).searchBack(searched, in: range)
) -> (Capture, Range<String.Index>)? {
ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range)
}
}
48 changes: 48 additions & 0 deletions Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

// MARK: `CollectionSearcher` algorithms

extension Collection {
public func firstMatch<S: MatchingCollectionSearcher>(
of searcher: S
) -> (S.Match, Range<S.Searched.Index>)? where S.Searched == Self {
var state = searcher.state(for: self, in: startIndex..<endIndex)
return searcher.matchingSearch(self, &state)
}
}

extension BidirectionalCollection {
public func lastMatch<S: BackwardMatchingCollectionSearcher>(
of searcher: S
) -> (S.Match, Range<S.BackwardSearched.Index>)?
where S.BackwardSearched == Self
{
var state = searcher.backwardState(for: self, in: startIndex..<endIndex)
return searcher.matchingSearchBack(self, &state)
}
}

// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func firstMatch<Capture>(
of regex: Regex<Capture>
) -> (Capture, Range<String.Index>)? {
firstMatch(of: RegexConsumer(regex))
}

public func lastMatch<Capture>(
of regex: Regex<Capture>
) -> (Capture, Range<String.Index>)? {
lastMatch(of: RegexConsumer(regex))
}
}
Loading