Skip to content

Replace DynamicCaptures with AnyRegexOutput. #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Sources/Exercises/Participants/RegexParticipant.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ private func extractFromCaptures(
private func graphemeBreakPropertyData<RP: RegexComponent>(
forLine line: String,
using regex: RP
) -> GraphemeBreakEntry? where RP.Match == (Substring, Substring, Substring?, Substring) {
line.match(regex).map(\.match).flatMap(extractFromCaptures)
) -> GraphemeBreakEntry? where RP.Output == (Substring, Substring, Substring?, Substring) {
line.match(regex).map(\.output).flatMap(extractFromCaptures)
}

private func graphemeBreakPropertyDataLiteral(
Expand Down Expand Up @@ -91,7 +91,7 @@ private func graphemeBreakPropertyData(
TryCapture(OneOrMore(.word)) { Unicode.GraphemeBreakProperty($0) }
ZeroOrMore(.any)
}.map {
let (_, lower, upper, property) = $0.match
let (_, lower, upper, property) = $0.output
return GraphemeBreakEntry(lower...(upper ?? lower), property)
}
}
24 changes: 12 additions & 12 deletions Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ var standardError = StandardErrorStream()

typealias Counter = Int64
let regexComponentProtocolName = "RegexComponent"
let matchAssociatedTypeName = "Match"
let outputAssociatedTypeName = "Output"
let patternProtocolRequirementName = "regex"
let regexTypeName = "Regex"
let baseMatchTypeName = "Substring"
Expand Down Expand Up @@ -202,15 +202,15 @@ struct VariadicsGenerator: ParsableCommand {
// Emit concatenation type declaration.

let whereClause: String = {
var result = " where R0.Match == "
var result = " where R0.\(outputAssociatedTypeName) == "
if leftArity == 0 {
result += "W0"
} else {
result += "(W0"
result += (0..<leftArity).map { ", C\($0)" }.joined()
result += ")"
}
result += ", R1.Match == "
result += ", R1.\(outputAssociatedTypeName) == "
if rightArity == 0 {
result += "W1"
} else {
Expand Down Expand Up @@ -267,7 +267,7 @@ struct VariadicsGenerator: ParsableCommand {
}
output(")")
}
output("> where R0.\(matchAssociatedTypeName) == ")
output("> where R0.\(outputAssociatedTypeName) == ")
if leftArity == 0 {
output("W0")
} else {
Expand Down Expand Up @@ -348,10 +348,10 @@ struct VariadicsGenerator: ParsableCommand {
self.matchType = arity == 0
? baseMatchTypeName
: "(\(baseMatchTypeName), \(quantifiedCaptures))"
self.whereClauseForInit = "where \(matchAssociatedTypeName) == \(matchType)" +
(arity == 0 ? "" : ", Component.Match == (W, \(capturesJoined))")
self.whereClauseForInit = "where \(outputAssociatedTypeName) == \(matchType)" +
(arity == 0 ? "" : ", Component.\(outputAssociatedTypeName) == (W, \(capturesJoined))")
self.whereClause = arity == 0 ? "" :
"where Component.Match == (W, \(capturesJoined))"
"where Component.\(outputAssociatedTypeName) == (W, \(capturesJoined))"
}
}

Expand Down Expand Up @@ -468,10 +468,10 @@ struct VariadicsGenerator: ParsableCommand {
let whereClause: String = {
var result = "where R0: \(regexComponentProtocolName), R1: \(regexComponentProtocolName)"
if leftArity > 0 {
result += ", R0.\(matchAssociatedTypeName) == (W0, \((0..<leftArity).map { "C\($0)" }.joined(separator: ", ")))"
result += ", R0.\(outputAssociatedTypeName) == (W0, \((0..<leftArity).map { "C\($0)" }.joined(separator: ", ")))"
}
if rightArity > 0 {
result += ", R1.\(matchAssociatedTypeName) == (W1, \((leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")))"
result += ", R1.\(outputAssociatedTypeName) == (W1, \((leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")))"
}
return result
}()
Expand Down Expand Up @@ -516,7 +516,7 @@ struct VariadicsGenerator: ParsableCommand {
}()
let whereClause: String = """
where R: \(regexComponentProtocolName), \
R.\(matchAssociatedTypeName) == (W, \(captures))
R.\(outputAssociatedTypeName) == (W, \(captures))
"""
let resultCaptures = (0..<arity).map { "C\($0)?" }.joined(separator: ", ")
output("""
Expand Down Expand Up @@ -544,8 +544,8 @@ struct VariadicsGenerator: ParsableCommand {
}
let rawNewMatchType = newMatchType(newCaptureType: "W")
let transformedNewMatchType = newMatchType(newCaptureType: "NewCapture")
let whereClauseRaw = "where \(matchAssociatedTypeName) == \(rawNewMatchType), R.\(matchAssociatedTypeName) == \(matchType)"
let whereClauseTransformed = "where \(matchAssociatedTypeName) == \(transformedNewMatchType), R.\(matchAssociatedTypeName) == \(matchType)"
let whereClauseRaw = "where \(outputAssociatedTypeName) == \(rawNewMatchType), R.\(outputAssociatedTypeName) == \(matchType)"
let whereClauseTransformed = "where \(outputAssociatedTypeName) == \(transformedNewMatchType), R.\(outputAssociatedTypeName) == \(matchType)"
output("""
// MARK: - Non-builder capture arity \(arity)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ extension RegexConsumer {
consumed.base,
in: range, mode: .partialFromFront
) else { return nil }
return (result.range.upperBound, result.match)
return (result.range.upperBound, result.output)
}
}

// TODO: Explicitly implement the non-matching consumer/searcher protocols as
// well, taking advantage of the fact that the captures can be ignored

extension RegexConsumer: MatchingCollectionConsumer {
public typealias Match = R.Match
public typealias Match = R.Output

public func matchingConsuming(
_ consumed: Consumed, in range: Range<Consumed.Index>
Expand Down
10 changes: 5 additions & 5 deletions Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct StoredCapture {
}

// TODO: Where should this live? Inside TypeConstruction?
func constructExistentialMatchComponent(
func constructExistentialOutputComponent(
from input: Substring,
in range: Range<String.Index>?,
value: Any?,
Expand Down Expand Up @@ -62,10 +62,10 @@ func constructExistentialMatchComponent(
}

extension StructuredCapture {
func existentialMatchComponent(
func existentialOutputComponent(
from input: Substring
) -> Any {
constructExistentialMatchComponent(
constructExistentialOutputComponent(
from: input,
in: storedCapture?.range,
value: storedCapture?.value,
Expand All @@ -81,13 +81,13 @@ extension StructuredCapture {
extension Sequence where Element == StructuredCapture {
// FIXME: This is a stop gap where we still slice the input
// and traffic through existentials
func existentialMatch(
func existentialOutput(
from input: Substring
) -> Any {
var caps = Array<Any>()
caps.append(input)
caps.append(contentsOf: self.map {
$0.existentialMatchComponent(from: input)
$0.existentialOutputComponent(from: input)
})
return TypeConstruction.tuple(of: caps)
}
Expand Down
12 changes: 6 additions & 6 deletions Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ struct Executor {
self.engine = Engine(program, enableTracing: enablesTracing)
}

func match<Match>(
func match<Output>(
_ input: String,
in inputRange: Range<String.Index>,
_ mode: MatchMode
) throws -> MatchResult<Match>? {
) throws -> Regex<Output>.Match? {
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)

Expand All @@ -43,8 +43,8 @@ struct Executor {
// FIXME: This is a workaround for not tracking (or
// specially compiling) whole-match values.
let value: Any?
if Match.self != Substring.self,
Match.self != (Substring, DynamicCaptures).self,
if Output.self != Substring.self,
Output.self != AnyRegexOutput.self,
caps.isEmpty
{
value = cpu.registers.values.first
Expand All @@ -53,7 +53,7 @@ struct Executor {
value = nil
}

return MatchResult(
return .init(
input: input,
range: range,
rawCaptures: caps,
Expand All @@ -65,7 +65,7 @@ struct Executor {
_ input: String,
in inputRange: Range<String.Index>,
_ mode: MatchMode
) throws -> MatchResult<(Substring, DynamicCaptures)>? {
) throws -> Regex<AnyRegexOutput>.Match? {
try match(input, in: inputRange, mode)
}
}
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/RegexDSL/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ extension Anchor {
public func lookahead<R: RegexComponent>(
negative: Bool = false,
@RegexComponentBuilder _ content: () -> R
) -> Regex<R.Match> {
) -> Regex<R.Output> {
Regex(node: .nonCapturingGroup(negative ? .negativeLookahead : .lookahead, content().regex.root))
}

public func lookahead<R: RegexComponent>(
_ component: R,
negative: Bool = false
) -> Regex<R.Match> {
) -> Regex<R.Output> {
Regex(node: .nonCapturingGroup(negative ? .negativeLookahead : .lookahead, component.regex.root))
}
128 changes: 128 additions & 0 deletions Sources/_StringProcessing/RegexDSL/AnyRegexOutput.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import _MatchingEngine

extension Regex where Output == AnyRegexOutput {
public init(_ pattern: String) throws {
self.init(ast: try parse(pattern, .traditional))
}
}

extension Regex.Match where Output == AnyRegexOutput {
// Ensures `.0` always refers to the whole match.
public subscript(
dynamicMember keyPath: KeyPath<(Substring, _doNotUse: ()), Substring>
) -> Substring {
input[range]
}
}

public struct AnyRegexOutput {
let input: String
fileprivate let _elements: [ElementRepresentation]

/// The underlying representation of the element of a type-erased regex
/// output.
fileprivate struct ElementRepresentation {
/// The depth of `Optioals`s wrapping the underlying value. For example,
/// `Substring` has optional depth `0`, and `Int??` has optional depth `2`.
let optionalDepth: Int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to this PR, did we decide on an optional nesting story?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s no non-factorial solution to optional flattening this year, unfortunately. So we’ll have nested optionals.

/// The bounds of the output element.
let bounds: Range<String.Index>?
}
}

extension AnyRegexOutput {
/// Creates a type-erased regex output from an existing output.
///
/// Use this initializer to fit a regex with strongly typed captures into the
/// use site of a dynamic regex, i.e. one that was created from a string.
public init<Output>(_ match: Regex<Output>.Match) {
// Note: We use type equality instead of `match.output as? ...` to prevent
// unexpected optional flattening.
if Output.self == AnyRegexOutput.self {
self = match.output as! AnyRegexOutput
return
}
fatalError("FIXME: Not implemented")
// self.init(input: match.input, _elements: <elements of output tuple>)
}
}

extension AnyRegexOutput {
internal init<C: Collection>(
input: String, elements: C
) where C.Element == StructuredCapture {
self.init(input: input, _elements: elements.map(ElementRepresentation.init))
}
}

extension AnyRegexOutput.ElementRepresentation {
init(_ element: StructuredCapture) {
self.init(
optionalDepth: element.optionalCount,
bounds: element.storedCapture.flatMap(\.range))
}

func value(forInput input: String) -> Any {
// Ok for now because `existentialMatchComponent`
// wont slice the input if there's no range to slice with
//
// FIXME: This is ugly :-/
let input = bounds.map { input[$0] } ?? ""

return constructExistentialOutputComponent(
from: input,
in: bounds,
value: nil,
optionalCount: optionalDepth)
}
}

extension AnyRegexOutput: RandomAccessCollection {
public struct Element {
fileprivate let representation: ElementRepresentation
let input: String

public var range: Range<String.Index>? {
representation.bounds
}

public var substring: Substring? {
range.map { input[$0] }
}
}

public var startIndex: Int {
_elements.startIndex
}

public var endIndex: Int {
_elements.endIndex
}

public var count: Int {
_elements.count
}

public func index(after i: Int) -> Int {
_elements.index(after: i)
}

public func index(before i: Int) -> Int {
_elements.index(before: i)
}

public subscript(position: Int) -> Element {
.init(representation: _elements[position], input: input)
}
}
Loading