Skip to content

Commit f00bfe0

Browse files
authored
Merge pull request #222 from rxwei/anyregexoutput
2 parents c97cd0d + b5dd0ef commit f00bfe0

File tree

6 files changed

+156
-115
lines changed

6 files changed

+156
-115
lines changed

Sources/_StringProcessing/Capture.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct StoredCapture {
3333
}
3434

3535
// TODO: Where should this live? Inside TypeConstruction?
36-
func constructExistentialMatchComponent(
36+
func constructExistentialOutputComponent(
3737
from input: Substring,
3838
in range: Range<String.Index>?,
3939
value: Any?,
@@ -62,10 +62,10 @@ func constructExistentialMatchComponent(
6262
}
6363

6464
extension StructuredCapture {
65-
func existentialMatchComponent(
65+
func existentialOutputComponent(
6666
from input: Substring
6767
) -> Any {
68-
constructExistentialMatchComponent(
68+
constructExistentialOutputComponent(
6969
from: input,
7070
in: storedCapture?.range,
7171
value: storedCapture?.value,
@@ -81,13 +81,13 @@ extension StructuredCapture {
8181
extension Sequence where Element == StructuredCapture {
8282
// FIXME: This is a stop gap where we still slice the input
8383
// and traffic through existentials
84-
func existentialMatch(
84+
func existentialOutput(
8585
from input: Substring
8686
) -> Any {
8787
var caps = Array<Any>()
8888
caps.append(input)
8989
caps.append(contentsOf: self.map {
90-
$0.existentialMatchComponent(from: input)
90+
$0.existentialOutputComponent(from: input)
9191
})
9292
return TypeConstruction.tuple(of: caps)
9393
}

Sources/_StringProcessing/Executor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct Executor {
4444
// specially compiling) whole-match values.
4545
let value: Any?
4646
if Output.self != Substring.self,
47-
Output.self != (Substring, DynamicCaptures).self,
47+
Output.self != AnyRegexOutput.self,
4848
caps.isEmpty
4949
{
5050
value = cpu.registers.values.first
@@ -65,7 +65,7 @@ struct Executor {
6565
_ input: String,
6666
in inputRange: Range<String.Index>,
6767
_ mode: MatchMode
68-
) throws -> Regex<(Substring, DynamicCaptures)>.Match? {
68+
) throws -> Regex<AnyRegexOutput>.Match? {
6969
try match(input, in: inputRange, mode)
7070
}
7171
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import _MatchingEngine
13+
14+
extension Regex where Output == AnyRegexOutput {
15+
public init(_ pattern: String) throws {
16+
self.init(ast: try parse(pattern, .traditional))
17+
}
18+
}
19+
20+
extension Regex.Match where Output == AnyRegexOutput {
21+
// Ensures `.0` always refers to the whole match.
22+
public subscript(
23+
dynamicMember keyPath: KeyPath<(Substring, _doNotUse: ()), Substring>
24+
) -> Substring {
25+
input[range]
26+
}
27+
}
28+
29+
public struct AnyRegexOutput {
30+
let input: String
31+
fileprivate let _elements: [ElementRepresentation]
32+
33+
/// The underlying representation of the element of a type-erased regex
34+
/// output.
35+
fileprivate struct ElementRepresentation {
36+
/// The depth of `Optioals`s wrapping the underlying value. For example,
37+
/// `Substring` has optional depth `0`, and `Int??` has optional depth `2`.
38+
let optionalDepth: Int
39+
/// The bounds of the output element.
40+
let bounds: Range<String.Index>?
41+
}
42+
}
43+
44+
extension AnyRegexOutput {
45+
/// Creates a type-erased regex output from an existing output.
46+
///
47+
/// Use this initializer to fit a regex with strongly typed captures into the
48+
/// use site of a dynamic regex, i.e. one that was created from a string.
49+
public init<Output>(_ match: Regex<Output>.Match) {
50+
// Note: We use type equality instead of `match.output as? ...` to prevent
51+
// unexpected optional flattening.
52+
if Output.self == AnyRegexOutput.self {
53+
self = match.output as! AnyRegexOutput
54+
return
55+
}
56+
fatalError("FIXME: Not implemented")
57+
// self.init(input: match.input, _elements: <elements of output tuple>)
58+
}
59+
}
60+
61+
extension AnyRegexOutput {
62+
internal init<C: Collection>(
63+
input: String, elements: C
64+
) where C.Element == StructuredCapture {
65+
self.init(input: input, _elements: elements.map(ElementRepresentation.init))
66+
}
67+
}
68+
69+
extension AnyRegexOutput.ElementRepresentation {
70+
init(_ element: StructuredCapture) {
71+
self.init(
72+
optionalDepth: element.optionalCount,
73+
bounds: element.storedCapture.flatMap(\.range))
74+
}
75+
76+
func value(forInput input: String) -> Any {
77+
// Ok for now because `existentialMatchComponent`
78+
// wont slice the input if there's no range to slice with
79+
//
80+
// FIXME: This is ugly :-/
81+
let input = bounds.map { input[$0] } ?? ""
82+
83+
return constructExistentialOutputComponent(
84+
from: input,
85+
in: bounds,
86+
value: nil,
87+
optionalCount: optionalDepth)
88+
}
89+
}
90+
91+
extension AnyRegexOutput: RandomAccessCollection {
92+
public struct Element {
93+
fileprivate let representation: ElementRepresentation
94+
let input: String
95+
96+
public var range: Range<String.Index>? {
97+
representation.bounds
98+
}
99+
100+
public var substring: Substring? {
101+
range.map { input[$0] }
102+
}
103+
}
104+
105+
public var startIndex: Int {
106+
_elements.startIndex
107+
}
108+
109+
public var endIndex: Int {
110+
_elements.endIndex
111+
}
112+
113+
public var count: Int {
114+
_elements.count
115+
}
116+
117+
public func index(after i: Int) -> Int {
118+
_elements.index(after: i)
119+
}
120+
121+
public func index(before i: Int) -> Int {
122+
_elements.index(before: i)
123+
}
124+
125+
public subscript(position: Int) -> Element {
126+
.init(representation: _elements[position], input: input)
127+
}
128+
}

Sources/_StringProcessing/RegexDSL/DynamicCaptures.swift

Lines changed: 0 additions & 84 deletions
This file was deleted.

Sources/_StringProcessing/RegexDSL/Match.swift

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ extension Regex {
2323

2424
extension Regex.Match {
2525
public var output: Output {
26-
if Output.self == (Substring, DynamicCaptures).self {
27-
// FIXME(rdar://89449323): Compiler assertion
28-
let input = input
29-
let dynCaps = rawCaptures.map { StoredDynamicCapture($0, in: input) }
30-
return (input[range], dynCaps) as! Output
26+
if Output.self == AnyRegexOutput.self {
27+
let wholeMatchAsCapture = StructuredCapture(
28+
optionalCount: 0,
29+
storedCapture: StoredCapture(range: range, value: nil))
30+
let output = AnyRegexOutput(
31+
input: input,
32+
elements: [wholeMatchAsCapture] + rawCaptures)
33+
return output as! Output
3134
} else if Output.self == Substring.self {
3235
// FIXME: Plumb whole match (`.0`) through the matching engine.
3336
return input[range] as! Output
@@ -40,7 +43,7 @@ extension Regex.Match {
4043
guard value == nil else {
4144
fatalError("FIXME: what would this mean?")
4245
}
43-
let typeErasedMatch = rawCaptures.existentialMatch(from: input[range])
46+
let typeErasedMatch = rawCaptures.existentialOutput(from: input[range])
4447
return typeErasedMatch as! Output
4548
}
4649
}
@@ -62,7 +65,7 @@ extension Regex.Match {
6265
preconditionFailure(
6366
"Reference did not capture any match in the regex")
6467
}
65-
return rawCaptures[offset].existentialMatchComponent(from: input[...])
68+
return rawCaptures[offset].existentialOutputComponent(from: input[...])
6669
as! Capture
6770
}
6871
}

Tests/RegexTests/RegexDSLTests.swift

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,6 @@
1212
import XCTest
1313
@testable import _StringProcessing
1414

15-
func dynCap(
16-
_ s: String, optional: Bool = false
17-
) -> StoredDynamicCapture {
18-
StoredDynamicCapture(s[...], optionalCount: optional ? 1 : 0)
19-
}
20-
2115
class RegexDSLTests: XCTestCase {
2216
func _testDSLCaptures<Content: RegexComponent, MatchType>(
2317
_ tests: (input: String, expectedCaptures: MatchType?)...,
@@ -559,25 +553,25 @@ class RegexDSLTests: XCTestCase {
559553
do {
560554
let regex = try Regex("aabcc.")
561555
let line = "aabccd"
562-
let captures = try XCTUnwrap(line.match(regex)?.1)
563-
XCTAssertEqual(captures, [])
556+
let match = try XCTUnwrap(line.match(regex))
557+
XCTAssertEqual(match.0, line[...])
558+
let output = match.output
559+
XCTAssertEqual(output[0].substring, line[...])
564560
}
565561
do {
566-
567562
let regex = try Regex(
568563
#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#)
569564
let line = """
570565
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
571566
COMBINING MARK TUKWENTIS
572567
"""
573-
let captures = try XCTUnwrap(line.match(regex)?.1)
574-
XCTAssertEqual(
575-
captures,
576-
[
577-
dynCap("A6F0"),
578-
dynCap("A6F1", optional: true),
579-
dynCap("Extend"),
580-
])
568+
let match = try XCTUnwrap(line.match(regex))
569+
XCTAssertEqual(match.0, line[...])
570+
let output = match.output
571+
XCTAssertEqual(output[0].substring, line[...])
572+
XCTAssertTrue(output[1].substring == "A6F0")
573+
XCTAssertTrue(output[2].substring == "A6F1")
574+
XCTAssertTrue(output[3].substring == "Extend")
581575
}
582576
}
583577

0 commit comments

Comments
 (0)