Skip to content

Commit 980fbb0

Browse files
committed
Regex<Captures> -> Regex<Match>
Changes `Regex` and result builder prototype to use `Match` as the generic parameter to make it consistent with the [Strongly Typed Regex Captures](https://forums.swift.org/t/pitch-strongly-typed-regex-captures/53391) pitch. Introduces `Tuple<n>` structs in order to be able to express constraints on capture types (i.e. `Match` dropped first) while being able to filter out empty captures in concatenation. `Tuple<n>` is also needed to implement a prototype of the [proposed matching semantics](#64). As coercion into `Tuple<n>` can no longer use runtime magic like native tuples do, we incorporate child capture type information into RECode's `captureNil` and `captureArray` instructions so that we will always get a concrete type when forming a nil or an empty array capture. The resulting existential tuple capture can then be opened and bitcast to a `Tuple<n>`.
1 parent 9a06a3e commit 980fbb0

File tree

19 files changed

+2925
-2500
lines changed

19 files changed

+2925
-2500
lines changed

Sources/Exercises/Participants/RegexParticipant.swift

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ struct RegexLiteralParticipant: Participant {
3333
}
3434

3535
private func extractFromCaptures(
36-
lower: Substring, upper: Substring?, prop: Substring
36+
_ match: Tuple4<Substring, Substring, Substring?, Substring>
3737
) -> GraphemeBreakEntry? {
38-
guard let lowerScalar = Unicode.Scalar(hex: lower),
39-
let upperScalar = upper.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40-
let property = Unicode.GraphemeBreakProperty(prop)
38+
guard let lowerScalar = Unicode.Scalar(hex: match._1),
39+
let upperScalar = match._2.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40+
let property = Unicode.GraphemeBreakProperty(match._3)
4141
else {
4242
return nil
4343
}
@@ -48,8 +48,8 @@ private func extractFromCaptures(
4848
private func graphemeBreakPropertyData<RP: RegexProtocol>(
4949
forLine line: String,
5050
using regex: RP
51-
) -> GraphemeBreakEntry? where RP.Capture == (Substring, Substring?, Substring) {
52-
line.match(regex).map(\.captures).flatMap(extractFromCaptures)
51+
) -> GraphemeBreakEntry? where RP.Match == Tuple4<Substring, Substring, Substring?, Substring> {
52+
line.match(regex).map(\.match).flatMap(extractFromCaptures)
5353
}
5454

5555
private func graphemeBreakPropertyData(
@@ -75,5 +75,5 @@ private func graphemeBreakPropertyDataLiteral(
7575
return graphemeBreakPropertyData(
7676
forLine: line,
7777
using: r(#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
78-
capturing: (Substring, Substring?, Substring).self))
78+
matching: Tuple4<Substring, Substring, Substring?, Substring>.self))
7979
}

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 107 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
// swift run VariadicsGenerator --max-arity 7 > Sources/RegexDSL/Concatenation.swift
1+
// swift run VariadicsGenerator --max-arity 7 > Sources/_StringProcessing/RegexDSL/Concatenation.swift
22

33
import ArgumentParser
44

55
struct Permutation {
66
let arity: Int
77
// 1 -> no extra constraint
8-
// 0 -> where T.Capture: NoCaptureProtocol
8+
// 0 -> where T.Match: NoCaptureProtocol
99
let bits: Int64
1010

1111
func isCaptureless(at index: Int) -> Bool {
@@ -77,8 +77,6 @@ func outputForEach<C: Collection>(
7777
if let lt = lineTerminator {
7878
let indent = needsSep ? " " : " "
7979
output("\(lt)\n\(indent)")
80-
} else if needsSep {
81-
output(" ")
8280
}
8381
}
8482
}
@@ -87,12 +85,13 @@ typealias Counter = Int64
8785
let patternProtocolName = "RegexProtocol"
8886
let concatenationStructTypeBaseName = "Concatenate"
8987
let capturingGroupTypeBaseName = "CapturingGroup"
88+
let matchAssociatedTypeName = "Match"
9089
let captureAssociatedTypeName = "Capture"
9190
let patternBuilderTypeName = "RegexBuilder"
9291
let patternProtocolRequirementName = "regex"
9392
let PatternTypeBaseName = "Regex"
94-
let emptyProtocolName = "EmptyProtocol"
95-
let emptyStructName = "Empty"
93+
let emptyProtocolName = "EmptyCaptureProtocol"
94+
let baseMatchTypeName = "Substring"
9695

9796
@main
9897
struct VariadicsGenerator: ParsableCommand {
@@ -114,6 +113,10 @@ struct VariadicsGenerator: ParsableCommand {
114113
115114
""")
116115

116+
for arity in 2...maxArity+1 {
117+
emitTupleStruct(arity: arity)
118+
}
119+
117120
for arity in minArity...maxArity {
118121
for permutation in Permutations(arity: arity) {
119122
emitConcatenation(permutation: permutation)
@@ -124,42 +127,123 @@ struct VariadicsGenerator: ParsableCommand {
124127
output("// END AUTO-GENERATED CONTENT")
125128
}
126129

130+
func emitTupleStruct(arity: Int) {
131+
output("@frozen public struct Tuple\(arity)<")
132+
outputForEach(0..<arity, separator: ", ") {
133+
"_\($0)"
134+
}
135+
output(">: TupleProtocol {\n")
136+
outputForEach(0..<arity, separator: "") {
137+
" public var _\($0): _\($0)\n"
138+
}
139+
output("\n init(_coercing: [Any]) {\n")
140+
outputForEach(0..<arity, separator: "") {
141+
" self._\($0) = _coercing[\($0)] as! _\($0)\n"
142+
}
143+
output(" }\n")
144+
145+
output("}\n")
146+
output("extension Tuple\(arity): \(emptyProtocolName) where ")
147+
outputForEach(1..<arity, separator: ", ") {
148+
"_\($0): \(emptyProtocolName)"
149+
}
150+
output(" {}\n")
151+
output("extension Tuple\(arity): MatchProtocol {\n")
152+
output(" public typealias Capture = ")
153+
if arity == 2 {
154+
output("_1")
155+
} else {
156+
output("Tuple\(arity-1)<")
157+
outputForEach(1..<arity, separator: ", ") {
158+
"_\($0)"
159+
}
160+
output(">")
161+
}
162+
// `public var tuple: (_0, ...) { (_0, ...) }`
163+
output("\n public var tuple: (")
164+
outputForEach(0..<arity, separator: ", ") {
165+
"_\($0)"
166+
}
167+
output(") { (")
168+
outputForEach(0..<arity, separator: ", ") {
169+
"self._\($0)"
170+
}
171+
output(") }\n")
172+
// `public init(_0: _0, ...) { ... }`
173+
output("\n public init(")
174+
outputForEach(0..<arity, separator: ", ") {
175+
"_ _\($0): _\($0)"
176+
}
177+
output(") {\n")
178+
outputForEach(0..<arity, separator: "") {
179+
" self._\($0) = _\($0)\n"
180+
}
181+
output(" }")
182+
output("\n}\n")
183+
// Equatable
184+
output("extension Tuple\(arity): Equatable where ")
185+
outputForEach(0..<arity, separator: ", ") {
186+
"_\($0): Equatable"
187+
}
188+
output(" {\n")
189+
output(" public static func == (lhs: Self, rhs: Self) -> Bool {\n")
190+
output(" ")
191+
outputForEach(0..<arity, separator: " && ") {
192+
"lhs._\($0) == rhs._\($0)"
193+
}
194+
output("\n }")
195+
output("}\n")
196+
}
197+
127198
func emitConcatenation(permutation: Permutation) {
128199
let arity = permutation.arity
200+
201+
func emitGenericParameters(withConstraints: Bool) {
202+
outputForEach(0..<arity, separator: ", ") {
203+
var base = "T\($0)"
204+
if withConstraints {
205+
base += ": \(patternProtocolName)"
206+
}
207+
return base
208+
}
209+
}
210+
129211
// Emit concatenation type declarations.
130212
// public struct Concatenation{n}_{perm}<...>: RegexProtocol {
131-
// public typealias Capture = ...
132-
// public let regex: Regex
213+
// public typealias Match = ...
214+
// public let regex: Regex<Match>
133215
// public init(...) { ... }
134216
// }
135-
let typeName = "\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
217+
let typeName =
218+
"\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
136219
output("public struct \(typeName)<\n ")
137-
outputForEach(0..<arity, separator: ",") { "T\($0): \(patternProtocolName)" }
220+
emitGenericParameters(withConstraints: true)
138221
output("\n>: \(patternProtocolName)")
139222
if permutation.hasCaptureless {
140223
output(" where ")
141-
outputForEach(permutation.capturelessIndices, separator: ",") {
142-
"T\($0).\(captureAssociatedTypeName): \(emptyProtocolName)"
224+
outputForEach(permutation.capturelessIndices, separator: ", ") {
225+
"T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName): \(emptyProtocolName)"
143226
}
144227
}
145228
output(" {\n")
146229
let captureIndices = permutation.captureIndices
147-
output(" public typealias \(captureAssociatedTypeName) = ")
230+
output(" public typealias \(matchAssociatedTypeName) = ")
148231
let captureElements = captureIndices
149-
.map { "T\($0).\(captureAssociatedTypeName)" }
232+
.map { "T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName)" }
150233
if captureElements.isEmpty {
151-
output(emptyStructName)
234+
output(baseMatchTypeName)
152235
} else {
153-
output("(\(captureElements.joined(separator: ", ")))")
236+
let count = captureElements.count + 1
237+
output("Tuple\(count)<\(baseMatchTypeName), \(captureElements.joined(separator: ", "))>")
154238
}
155239
output("\n")
156-
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(captureAssociatedTypeName)>\n")
240+
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(matchAssociatedTypeName)>\n")
157241
output(" init(")
158-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
242+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
159243
output(") {\n")
160244
output(" \(patternProtocolRequirementName) = .init(ast: concat(\n ")
161245
outputForEach(
162-
0..<arity, separator: ",", lineTerminator: ""
246+
0..<arity, separator: ", ", lineTerminator: ""
163247
) { i in
164248
"x\(i).\(patternProtocolRequirementName).ast"
165249
}
@@ -169,14 +253,14 @@ struct VariadicsGenerator: ParsableCommand {
169253
// Emit concatenation builders.
170254
output("extension \(patternBuilderTypeName) {\n")
171255
output(" public static func buildBlock<")
172-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
256+
emitGenericParameters(withConstraints: true)
173257
output(">(\n ")
174-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
258+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
175259
output("\n ) -> \(typeName)<")
176-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
260+
emitGenericParameters(withConstraints: false)
177261
output("> {\n")
178262
output(" \(typeName)(")
179-
outputForEach(0..<arity, separator: ",") { "x\($0)" }
263+
outputForEach(0..<arity, separator: ", ") { "x\($0)" }
180264
output(")\n }\n}\n\n")
181265
}
182266
}

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ extension AST {
2525
.reduce(.empty, +)
2626
.map(CaptureStructure.optional)
2727
case .concatenation(let concatenation):
28-
assert(concatenation.children.count > 1)
2928
return concatenation.children.map(\.captureStructure).reduce(.empty, +)
3029
case .group(let group):
3130
let innerCaptures = group.child.captureStructure
@@ -107,6 +106,27 @@ extension CaptureStructure {
107106
}
108107
return false
109108
}
109+
110+
public func type(withAtomType atomType: Any.Type) -> Any.Type {
111+
switch self {
112+
case .atom:
113+
return atomType
114+
case .array(let child):
115+
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
116+
case .optional(let child):
117+
return TypeConstruction.optionalType(of: child.type(withAtomType: atomType))
118+
case .tuple(let children):
119+
return TypeConstruction.tupleType(of: children.map {
120+
$0.type(withAtomType: atomType)
121+
})
122+
}
123+
}
124+
125+
public typealias DefaultAtomType = Substring
126+
127+
public var type: Any.Type {
128+
type(withAtomType: DefaultAtomType.self)
129+
}
110130
}
111131

112132
// MARK: - Serialization

Sources/_StringProcessing/Utility/TypeConstruction.swift renamed to Sources/_MatchingEngine/Utility/TypeConstruction.swift

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,9 @@ private func swift_getTupleTypeMetadata3(
4848
proposedWitnesses: UnsafeRawPointer?
4949
) -> (value: Any.Type, state: Int)
5050

51-
enum TypeConstruction {
52-
51+
public enum TypeConstruction {
5352
/// Returns a tuple metatype of the given element types.
54-
static func tupleType<
53+
public static func tupleType<
5554
ElementTypes: BidirectionalCollection
5655
>(
5756
of elementTypes: __owned ElementTypes
@@ -104,7 +103,7 @@ enum TypeConstruction {
104103
}
105104

106105
/// Creates a type-erased tuple with the given elements.
107-
static func tuple<Elements: BidirectionalCollection>(
106+
public static func tuple<Elements: BidirectionalCollection>(
108107
of elements: __owned Elements
109108
) -> Any where Elements.Element == Any {
110109
// Open existential on the overall tuple type.
@@ -133,4 +132,18 @@ enum TypeConstruction {
133132
let elementTypes = elements.map { type(of: $0) }
134133
return _openExistential(tupleType(of: elementTypes), do: create)
135134
}
135+
136+
public static func arrayType(of childType: Any.Type) -> Any.Type {
137+
func helper<T>(_: T.Type) -> Any.Type {
138+
[T].self
139+
}
140+
return _openExistential(childType, do: helper)
141+
}
142+
143+
public static func optionalType(of childType: Any.Type) -> Any.Type {
144+
func helper<T>(_: T.Type) -> Any.Type {
145+
T?.self
146+
}
147+
return _openExistential(childType, do: helper)
148+
}
136149
}

Sources/_StringProcessing/Capture.swift

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,17 @@ import _MatchingEngine
55
enum Capture {
66
case atom(Any)
77
indirect case tuple([Capture])
8-
indirect case optional(Capture?)
9-
indirect case array([Capture])
8+
indirect case some(Capture)
9+
case none(childType: AnyCaptureType)
10+
indirect case array([Capture], childType: AnyCaptureType)
11+
12+
static func none(childType: Any.Type) -> Capture {
13+
.none(childType: AnyCaptureType(childType))
14+
}
15+
16+
static func array(_ children: [Capture], childType: Any.Type) -> Capture {
17+
.array(children, childType: AnyCaptureType(childType))
18+
}
1019
}
1120

1221
extension Capture {
@@ -25,21 +34,48 @@ extension Capture {
2534
case .tuple(let elements):
2635
return TypeConstruction.tuple(
2736
of: elements.map(\.value))
28-
case .array(let elements):
29-
guard let first = elements.first else {
30-
return [Any]()
37+
case .array(let elements, let childType):
38+
func helper<T>(_: T.Type) -> Any {
39+
elements.map { $0.value as! T }
3140
}
32-
// When the array is not empty, infer the concrete `Element `type from the first element.
33-
func helper<T>(_ first: T) -> Any {
34-
var castElements = [first]
35-
for element in elements.dropFirst() {
36-
castElements.append(element.value as! T)
37-
}
38-
return castElements
41+
return _openExistential(childType.base, do: helper)
42+
case .some(let subcapture):
43+
return subcapture.value
44+
case .none(let childType):
45+
func helper<T>(_: T.Type) -> Any {
46+
nil as T? as Any
3947
}
40-
return _openExistential(first.value, do: helper)
41-
case .optional(let subcapture):
42-
return subcapture?.value as Any
48+
return _openExistential(childType.base, do: helper)
49+
}
50+
}
51+
52+
private func prepending(_ newElement: Any) -> Self {
53+
switch self {
54+
case .atom, .some, .none, .array:
55+
return .tuple([.atom(newElement), self])
56+
case .tuple(let elements):
57+
return .tuple([.atom(newElement)] + elements)
4358
}
4459
}
60+
61+
func matchValue(withWholeMatch wholeMatch: Substring) -> Any {
62+
prepending(wholeMatch).value
63+
}
64+
}
65+
66+
/// A wrapper of an existential metatype, equatable and hashable by reference.
67+
struct AnyCaptureType: Equatable, Hashable {
68+
var base: Any.Type
69+
70+
init(_ type: Any.Type) {
71+
base = type
72+
}
73+
74+
static func == (lhs: AnyCaptureType, rhs: AnyCaptureType) -> Bool {
75+
lhs.base == rhs.base
76+
}
77+
78+
func hash(into hasher: inout Hasher) {
79+
hasher.combine(ObjectIdentifier(base))
80+
}
4581
}

0 commit comments

Comments
 (0)