Skip to content

Initial matcher/validator support #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extension BidirectionalCollection where Element: Comparable {
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func contains<Capture>(_ regex: Regex<Capture>) -> Bool {
public func contains<R: RegexProtocol>(_ regex: R) -> Bool {
contains(RegexConsumer(regex))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ extension BidirectionalCollection where Element: Comparable {
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func firstRange<Capture>(of regex: Regex<Capture>) -> Range<Index>? {
public func firstRange<R: RegexProtocol>(of regex: R) -> Range<Index>? {
firstRange(of: RegexConsumer(regex))
}

public func lastRange<Capture>(of regex: Regex<Capture>) -> Range<Index>? {
public func lastRange<R: RegexProtocol>(of regex: R) -> Range<Index>? {
lastRange(of: RegexConsumer(regex))
}
}
34 changes: 6 additions & 28 deletions Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ public struct RangesCollection<Searcher: CollectionSearcher> {
}
}

extension RangesCollection where Searcher: BidirectionalCollectionSearcher {
public func reversed() -> ReversedRangesCollection<Searcher> {
ReversedRangesCollection(base: base, searcher: searcher)
}

public var last: Range<Base.Index>? {
base.lastRange(of: searcher)
}
}

public struct RangesIterator<Searcher: CollectionSearcher>: IteratorProtocol {
public typealias Base = Searcher.Searched

Expand Down Expand Up @@ -139,18 +129,6 @@ public struct ReversedRangesCollection<Searcher: BackwardCollectionSearcher> {
}
}

extension ReversedRangesCollection
where Searcher: BidirectionalCollectionSearcher
{
public func reversed() -> RangesCollection<Searcher> {
RangesCollection(base: base, searcher: searcher)
}

public var last: Range<Base.Index>? {
base.firstRange(of: searcher)
}
}

extension ReversedRangesCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -238,15 +216,15 @@ extension BidirectionalCollection where Element: Comparable {
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func ranges<Capture>(
of regex: Regex<Capture>
) -> RangesCollection<RegexConsumer<Self>> {
public func ranges<R: RegexProtocol>(
of regex: R
) -> RangesCollection<RegexConsumer<R, Self>> {
ranges(of: RegexConsumer(regex))
}

public func rangesFromBack<Capture>(
of regex: Regex<Capture>
) -> ReversedRangesCollection<RegexConsumer<Self>> {
public func rangesFromBack<R: RegexProtocol>(
of regex: R
) -> ReversedRangesCollection<RegexConsumer<R, Self>> {
rangesFromBack(of: RegexConsumer(regex))
}
}
20 changes: 11 additions & 9 deletions Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ extension RangeReplaceableCollection {
maxReplacements: maxReplacements)
}

public mutating func replace<Searcher: CollectionSearcher, R: Collection>(
public mutating func replace<
Searcher: CollectionSearcher, Replacement: Collection
>(
_ searcher: Searcher,
with replacement: R,
with replacement: Replacement,
maxReplacements: Int = .max
) where Searcher.Searched == SubSequence, R.Element == Element {
) where Searcher.Searched == SubSequence, Replacement.Element == Element {
self = replacing(
searcher,
with: replacement,
Expand Down Expand Up @@ -147,8 +149,8 @@ extension RangeReplaceableCollection
// MARK: Regex algorithms

extension RangeReplaceableCollection where SubSequence == Substring {
public func replacing<Capture, Replacement: Collection>(
_ regex: Regex<Capture>,
public func replacing<R: RegexProtocol, Replacement: Collection>(
_ regex: R,
with replacement: Replacement,
subrange: Range<Index>,
maxReplacements: Int = .max
Expand All @@ -160,8 +162,8 @@ extension RangeReplaceableCollection where SubSequence == Substring {
maxReplacements: maxReplacements)
}

public func replacing<Capture, Replacement: Collection>(
_ regex: Regex<Capture>,
public func replacing<R: RegexProtocol, Replacement: Collection>(
_ regex: R,
with replacement: Replacement,
maxReplacements: Int = .max
) -> Self where Replacement.Element == Element {
Expand All @@ -172,8 +174,8 @@ extension RangeReplaceableCollection where SubSequence == Substring {
maxReplacements: maxReplacements)
}

public mutating func replace<Capture, Replacement: Collection>(
_ regex: Regex<Capture>,
public mutating func replace<R: RegexProtocol, Replacement: Collection>(
_ regex: R,
with replacement: Replacement,
maxReplacements: Int = .max
) where Replacement.Element == Element {
Expand Down
26 changes: 6 additions & 20 deletions Sources/_StringProcessing/Algorithms/Algorithms/Split.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ public struct SplitCollection<Searcher: CollectionSearcher> {
}
}

extension SplitCollection where Searcher: BidirectionalCollectionSearcher {
public func reversed() -> ReversedSplitCollection<Searcher> {
ReversedSplitCollection(ranges: ranges.reversed())
}
}

extension SplitCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -144,14 +138,6 @@ public struct ReversedSplitCollection<Searcher: BackwardCollectionSearcher> {
}
}

extension ReversedSplitCollection
where Searcher: BidirectionalCollectionSearcher
{
public func reversed() -> SplitCollection<Searcher> {
SplitCollection(ranges: ranges.reversed())
}
}

extension ReversedSplitCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
Expand Down Expand Up @@ -289,15 +275,15 @@ extension BidirectionalCollection where Element: Comparable {
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func split<Capture>(
by separator: Regex<Capture>
) -> SplitCollection<RegexConsumer<Self>> {
public func split<R: RegexProtocol>(
by separator: R
) -> SplitCollection<RegexConsumer<R, Self>> {
split(by: RegexConsumer(separator))
}

public func splitFromBack<Capture>(
by separator: Regex<Capture>
) -> ReversedSplitCollection<RegexConsumer<Self>> {
public func splitFromBack<R: RegexProtocol>(
by separator: R
) -> ReversedSplitCollection<RegexConsumer<R, Self>> {
splitFromBack(by: RegexConsumer(separator))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ extension BidirectionalCollection where Element: Equatable {
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func starts<Capture>(with regex: Regex<Capture>) -> Bool {
public func starts<R: RegexProtocol>(with regex: R) -> Bool {
starts(with: RegexConsumer(regex))
}

public func ends<Capture>(with regex: Regex<Capture>) -> Bool {
public func ends<R: RegexProtocol>(with regex: R) -> Bool {
ends(with: RegexConsumer(regex))
}
}
22 changes: 11 additions & 11 deletions Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift
Original file line number Diff line number Diff line change
Expand Up @@ -257,48 +257,48 @@ extension RangeReplaceableCollection
// MARK: Regex algorithms

extension BidirectionalCollection where SubSequence == Substring {
public func trimmingPrefix<Capture>(_ regex: Regex<Capture>) -> SubSequence {
public func trimmingPrefix<R: RegexProtocol>(_ regex: R) -> SubSequence {
trimmingPrefix(RegexConsumer(regex))
}

public func trimmingSuffix<Capture>(_ regex: Regex<Capture>) -> SubSequence {
public func trimmingSuffix<R: RegexProtocol>(_ regex: R) -> SubSequence {
trimmingSuffix(RegexConsumer(regex))
}

public func trimming<Capture>(_ regex: Regex<Capture>) -> SubSequence {
public func trimming<R: RegexProtocol>(_ regex: R) -> SubSequence {
trimming(RegexConsumer(regex))
}
}

extension RangeReplaceableCollection
where Self: BidirectionalCollection, SubSequence == Substring
{
public mutating func trimPrefix<Capture>(_ regex: Regex<Capture>) {
public mutating func trimPrefix<R: RegexProtocol>(_ regex: R) {
trimPrefix(RegexConsumer(regex))
}

public mutating func trimSuffix<Capture>(_ regex: Regex<Capture>) {
public mutating func trimSuffix<R: RegexProtocol>(_ regex: R) {
trimSuffix(RegexConsumer(regex))
}

public mutating func trim<Capture>(_ regex: Regex<Capture>) {
let consumer = RegexConsumer<Self>(regex)
public mutating func trim<R: RegexProtocol>(_ regex: R) {
let consumer = RegexConsumer<R, Self>(regex)
trimPrefix(consumer)
trimSuffix(consumer)
}
}

extension Substring {
public mutating func trimPrefix<Capture>(_ regex: Regex<Capture>) {
public mutating func trimPrefix<R: RegexProtocol>(_ regex: R) {
trimPrefix(RegexConsumer(regex))
}

public mutating func trimSuffix<Capture>(_ regex: Regex<Capture>) {
public mutating func trimSuffix<R: RegexProtocol>(_ regex: R) {
trimSuffix(RegexConsumer(regex))
}

public mutating func trim<Capture>(_ regex: Regex<Capture>) {
let consumer = RegexConsumer<Self>(regex)
public mutating func trim<R: RegexProtocol>(_ regex: R) {
let consumer = RegexConsumer<R, Self>(regex)
trimPrefix(consumer)
trimSuffix(consumer)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extension PredicateConsumer: StatelessCollectionSearcher {
}

extension PredicateConsumer: BackwardCollectionSearcher,
StatelessBackwardCollectionSearcher
BackwardStatelessCollectionSearcher
where Searched: BidirectionalCollection
{
public typealias BackwardSearched = Consumed
Expand Down
86 changes: 42 additions & 44 deletions Sources/_StringProcessing/Algorithms/Consumers/RegexConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,56 +9,54 @@
//
//===----------------------------------------------------------------------===//

import _MatchingEngine
public struct RegexConsumer<
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks fine to me for now.

Outside scope of this PR, we might make RegexProtocol itself a consumer/searcher/matcher. But, I think it's best to establish parent-child relationships after we have the fundamental model in place. The important thing is getting this code and conformances in.

BTW, why does this need to be public?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some consumer/searcher algorithms return a type generic over the searcher, so the regex overloads return a type generic over RegexConsumer. So same story as PredicateConsumer at the moment.

R: RegexProtocol, Consumed: BidirectionalCollection
> where Consumed.SubSequence == Substring {
// TODO: Should `Regex` itself implement these protocols?
let regex: R

public struct RegexConsumer<Consumed: BidirectionalCollection>
where Consumed.SubSequence == Substring
{
// TODO: consider let, for now lets us toggle tracing
var vm: Executor

// FIXME: Possibility of fatal error isn't user friendly
public init<Capture>(_ regex: Regex<Capture>) {
do {
self.vm = .init(
program: try Compiler(ast: regex.ast).emit())
} catch {
fatalError("error: \(error)")
}
public init(_ regex: R) {
self.regex = regex
}
}

public init(parsing regex: String) throws {
self.vm = try _compileRegex(regex)
}

func _consuming(
extension RegexConsumer {
func _matchingConsuming(
_ consumed: Substring, in range: Range<String.Index>
) -> String.Index? {
let result = vm.execute(
input: consumed.base,
in: range,
mode: .partialFromFront)
return result?.range.upperBound
) -> (upperBound: String.Index, match: Match)? {
guard let result = regex._match(
consumed.base,
in: range, mode: .partialFromFront
) else { return nil }
return (result.range.upperBound, result.match)
}
}

// TODO: Explicitly implement the non-matching consumer/searcher protocols as
// well, taking advantage of the fact that the captures can be ignored

extension RegexConsumer: MatchingCollectionConsumer {
public typealias Match = R.Match

public func consuming(
public func matchingConsuming(
_ consumed: Consumed, in range: Range<Consumed.Index>
) -> String.Index? {
_consuming(consumed[...], in: range)
) -> (upperBound: String.Index, match: Match)? {
_matchingConsuming(consumed[...], in: range)
}
}

// TODO: We'll want to bake backwards into the engine
extension RegexConsumer: BidirectionalCollectionConsumer {
public func consumingBack(
extension RegexConsumer: BidirectionalMatchingCollectionConsumer {
public func matchingConsumingBack(
_ consumed: Consumed, in range: Range<Consumed.Index>
) -> String.Index? {
) -> (lowerBound: String.Index, match: Match)? {
var i = range.lowerBound
while true {
if let end = _consuming(consumed[...], in: i..<range.upperBound),
end == range.upperBound
{
return i
if let (end, capture) = _matchingConsuming(
consumed[...],
in: i..<range.upperBound
), end == range.upperBound {
return (i, capture)
} else if i == range.upperBound {
return nil
} else {
Expand All @@ -68,26 +66,26 @@ extension RegexConsumer: BidirectionalCollectionConsumer {
}
}

extension RegexConsumer: StatelessCollectionSearcher {
extension RegexConsumer: MatchingStatelessCollectionSearcher {
public typealias Searched = Consumed

// TODO: We'll want to bake search into the engine so it can
// take advantage of the structure of the regex itself and
// its own internal state
public func search(
public func matchingSearch(
_ searched: Searched, in range: Range<Searched.Index>
) -> Range<String.Index>? {
ConsumerSearcher(consumer: self).search(searched, in: range)
) -> (range: Range<String.Index>, match: Match)? {
ConsumerSearcher(consumer: self).matchingSearch(searched, in: range)
}
}

// TODO: Bake in search-back to engine too
extension RegexConsumer: StatelessBackwardCollectionSearcher {
extension RegexConsumer: BackwardMatchingStatelessCollectionSearcher {
public typealias BackwardSearched = Consumed

public func searchBack(
public func matchingSearchBack(
_ searched: BackwardSearched, in range: Range<Searched.Index>
) -> Range<String.Index>? {
ConsumerSearcher(consumer: self).searchBack(searched, in: range)
) -> (range: Range<String.Index>, match: Match)? {
ConsumerSearcher(consumer: self).matchingSearchBack(searched, in: range)
}
}
Loading