Skip to content

Commit 6faddbf

Browse files
committed
Regex<Captures> -> Regex<Match>
Changes `Regex` and result builder prototype to use `Match` as the generic parameter to make it consistent with the [Strongly Typed Regex Captures](https://forums.swift.org/t/pitch-strongly-typed-regex-captures/53391) pitch. Introduces `Tuple<n>` structs in order to be able to express constraints on capture types (i.e. `Match` dropped first) while being able to filter out empty captures in concatenation. `Tuple<n>` is also needed to implement a prototype of the [proposed matching semantics](#64). As coercion into `Tuple<n>` can no longer use runtime magic like native tuples do, we incorporate child capture type information into RECode's `captureNil` and `captureArray` instructions so that we will always get a concrete type when forming a nil or an empty array capture. The resulting existential tuple capture can then be opened and bitcast to a `Tuple<n>`.
1 parent 9a06a3e commit 6faddbf

File tree

19 files changed

+2842
-2500
lines changed

19 files changed

+2842
-2500
lines changed

Sources/Exercises/Participants/RegexParticipant.swift

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ struct RegexLiteralParticipant: Participant {
3333
}
3434

3535
private func extractFromCaptures(
36-
lower: Substring, upper: Substring?, prop: Substring
36+
_ match: Tuple4<Substring, Substring, Substring?, Substring>
3737
) -> GraphemeBreakEntry? {
38-
guard let lowerScalar = Unicode.Scalar(hex: lower),
39-
let upperScalar = upper.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40-
let property = Unicode.GraphemeBreakProperty(prop)
38+
guard let lowerScalar = Unicode.Scalar(hex: match.1),
39+
let upperScalar = match.2.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40+
let property = Unicode.GraphemeBreakProperty(match.3)
4141
else {
4242
return nil
4343
}
@@ -48,8 +48,8 @@ private func extractFromCaptures(
4848
private func graphemeBreakPropertyData<RP: RegexProtocol>(
4949
forLine line: String,
5050
using regex: RP
51-
) -> GraphemeBreakEntry? where RP.Capture == (Substring, Substring?, Substring) {
52-
line.match(regex).map(\.captures).flatMap(extractFromCaptures)
51+
) -> GraphemeBreakEntry? where RP.Match == Tuple4<Substring, Substring, Substring?, Substring> {
52+
line.match(regex).map(\.match).flatMap(extractFromCaptures)
5353
}
5454

5555
private func graphemeBreakPropertyData(
@@ -75,5 +75,5 @@ private func graphemeBreakPropertyDataLiteral(
7575
return graphemeBreakPropertyData(
7676
forLine: line,
7777
using: r(#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
78-
capturing: (Substring, Substring?, Substring).self))
78+
matching: Tuple4<Substring, Substring, Substring?, Substring>.self))
7979
}

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 105 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
// swift run VariadicsGenerator --max-arity 7 > Sources/RegexDSL/Concatenation.swift
1+
// swift run VariadicsGenerator --max-arity 7 > Sources/_StringProcessing/RegexDSL/Concatenation.swift
22

33
import ArgumentParser
44

55
struct Permutation {
66
let arity: Int
77
// 1 -> no extra constraint
8-
// 0 -> where T.Capture: NoCaptureProtocol
8+
// 0 -> where T.Match: NoCaptureProtocol
99
let bits: Int64
1010

1111
func isCaptureless(at index: Int) -> Bool {
@@ -77,8 +77,6 @@ func outputForEach<C: Collection>(
7777
if let lt = lineTerminator {
7878
let indent = needsSep ? " " : " "
7979
output("\(lt)\n\(indent)")
80-
} else if needsSep {
81-
output(" ")
8280
}
8381
}
8482
}
@@ -87,12 +85,13 @@ typealias Counter = Int64
8785
let patternProtocolName = "RegexProtocol"
8886
let concatenationStructTypeBaseName = "Concatenate"
8987
let capturingGroupTypeBaseName = "CapturingGroup"
88+
let matchAssociatedTypeName = "Match"
9089
let captureAssociatedTypeName = "Capture"
9190
let patternBuilderTypeName = "RegexBuilder"
9291
let patternProtocolRequirementName = "regex"
9392
let PatternTypeBaseName = "Regex"
94-
let emptyProtocolName = "EmptyProtocol"
95-
let emptyStructName = "Empty"
93+
let emptyProtocolName = "EmptyCaptureProtocol"
94+
let baseMatchTypeName = "Substring"
9695

9796
@main
9897
struct VariadicsGenerator: ParsableCommand {
@@ -114,6 +113,10 @@ struct VariadicsGenerator: ParsableCommand {
114113
115114
""")
116115

116+
for arity in 2...maxArity+1 {
117+
emitTupleStruct(arity: arity)
118+
}
119+
117120
for arity in minArity...maxArity {
118121
for permutation in Permutations(arity: arity) {
119122
emitConcatenation(permutation: permutation)
@@ -124,42 +127,121 @@ struct VariadicsGenerator: ParsableCommand {
124127
output("// END AUTO-GENERATED CONTENT")
125128
}
126129

130+
func emitTupleStruct(arity: Int) {
131+
output("""
132+
@frozen @dynamicMemberLookup
133+
public struct Tuple\(arity)<
134+
""")
135+
outputForEach(0..<arity, separator: ", ") {
136+
"_\($0)"
137+
}
138+
output("> {\n")
139+
// `public typealias Tuple = (_0, ...)`
140+
output("\n public typealias Tuple = (")
141+
outputForEach(0..<arity, separator: ", ") { "_\($0)" }
142+
output(")")
143+
// `public var tuple: Tuple`
144+
output("\n public var tuple: Tuple\n")
145+
// `subscript(dynamicMember:)`
146+
output("""
147+
public subscript<T>(dynamicMember keyPath: WritableKeyPath<Tuple, T>) -> T {
148+
get { tuple[keyPath: keyPath] }
149+
_modify { yield &tuple[keyPath: keyPath] }
150+
}
151+
""")
152+
output("\n}\n")
153+
output("extension Tuple\(arity): \(emptyProtocolName) where ")
154+
outputForEach(1..<arity, separator: ", ") {
155+
"_\($0): \(emptyProtocolName)"
156+
}
157+
output(" {}\n")
158+
output("extension Tuple\(arity): MatchProtocol {\n")
159+
output(" public typealias Capture = ")
160+
if arity == 2 {
161+
output("_1")
162+
} else {
163+
output("Tuple\(arity-1)<")
164+
outputForEach(1..<arity, separator: ", ") {
165+
"_\($0)"
166+
}
167+
output(">")
168+
}
169+
output("\n public init(_ tuple: Tuple) { self.tuple = tuple }")
170+
// `public init(_0: _0, ...) { ... }`
171+
output("\n public init(")
172+
outputForEach(0..<arity, separator: ", ") {
173+
"_ _\($0): _\($0)"
174+
}
175+
output(") {\n")
176+
output(" self.init((")
177+
outputForEach(0..<arity, separator: ", ") { "_\($0)" }
178+
output("))")
179+
output(" }")
180+
output("\n}\n")
181+
// Equatable
182+
output("extension Tuple\(arity): Equatable where ")
183+
outputForEach(0..<arity, separator: ", ") {
184+
"_\($0): Equatable"
185+
}
186+
output(" {\n")
187+
output(" public static func == (lhs: Self, rhs: Self) -> Bool {\n")
188+
output(" ")
189+
outputForEach(0..<arity, separator: " && ") {
190+
"lhs.tuple.\($0) == rhs.tuple.\($0)"
191+
}
192+
output("\n }")
193+
output("}\n")
194+
}
195+
127196
func emitConcatenation(permutation: Permutation) {
128197
let arity = permutation.arity
198+
199+
func emitGenericParameters(withConstraints: Bool) {
200+
outputForEach(0..<arity, separator: ", ") {
201+
var base = "T\($0)"
202+
if withConstraints {
203+
base += ": \(patternProtocolName)"
204+
}
205+
return base
206+
}
207+
}
208+
129209
// Emit concatenation type declarations.
130210
// public struct Concatenation{n}_{perm}<...>: RegexProtocol {
131-
// public typealias Capture = ...
132-
// public let regex: Regex
211+
// public typealias Match = ...
212+
// public let regex: Regex<Match>
133213
// public init(...) { ... }
134214
// }
135-
let typeName = "\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
215+
let typeName =
216+
"\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
136217
output("public struct \(typeName)<\n ")
137-
outputForEach(0..<arity, separator: ",") { "T\($0): \(patternProtocolName)" }
218+
emitGenericParameters(withConstraints: true)
138219
output("\n>: \(patternProtocolName)")
139220
if permutation.hasCaptureless {
140221
output(" where ")
141-
outputForEach(permutation.capturelessIndices, separator: ",") {
142-
"T\($0).\(captureAssociatedTypeName): \(emptyProtocolName)"
222+
outputForEach(permutation.capturelessIndices, separator: ", ") {
223+
"T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName): \(emptyProtocolName)"
143224
}
144225
}
145226
output(" {\n")
146227
let captureIndices = permutation.captureIndices
147-
output(" public typealias \(captureAssociatedTypeName) = ")
228+
output(" public typealias \(matchAssociatedTypeName) = ")
148229
let captureElements = captureIndices
149-
.map { "T\($0).\(captureAssociatedTypeName)" }
230+
.map { "T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName)" }
150231
if captureElements.isEmpty {
151-
output(emptyStructName)
232+
output(baseMatchTypeName)
152233
} else {
153-
output("(\(captureElements.joined(separator: ", ")))")
234+
let count = captureElements.count + 1
235+
output("Tuple\(count)<\(baseMatchTypeName), \(captureElements.joined(separator: ", "))>")
154236
}
155237
output("\n")
156-
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(captureAssociatedTypeName)>\n")
238+
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(matchAssociatedTypeName)>\n")
157239
output(" init(")
158-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
240+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
159241
output(") {\n")
160242
output(" \(patternProtocolRequirementName) = .init(ast: concat(\n ")
161243
outputForEach(
162-
0..<arity, separator: ",", lineTerminator: ""
244+
0..<arity, separator: ", ", lineTerminator: ""
163245
) { i in
164246
"x\(i).\(patternProtocolRequirementName).ast"
165247
}
@@ -169,14 +251,14 @@ struct VariadicsGenerator: ParsableCommand {
169251
// Emit concatenation builders.
170252
output("extension \(patternBuilderTypeName) {\n")
171253
output(" public static func buildBlock<")
172-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
254+
emitGenericParameters(withConstraints: true)
173255
output(">(\n ")
174-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
256+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
175257
output("\n ) -> \(typeName)<")
176-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
258+
emitGenericParameters(withConstraints: false)
177259
output("> {\n")
178260
output(" \(typeName)(")
179-
outputForEach(0..<arity, separator: ",") { "x\($0)" }
261+
outputForEach(0..<arity, separator: ", ") { "x\($0)" }
180262
output(")\n }\n}\n\n")
181263
}
182264
}

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ extension AST {
2525
.reduce(.empty, +)
2626
.map(CaptureStructure.optional)
2727
case .concatenation(let concatenation):
28-
assert(concatenation.children.count > 1)
2928
return concatenation.children.map(\.captureStructure).reduce(.empty, +)
3029
case .group(let group):
3130
let innerCaptures = group.child.captureStructure
@@ -107,6 +106,27 @@ extension CaptureStructure {
107106
}
108107
return false
109108
}
109+
110+
public func type(withAtomType atomType: Any.Type) -> Any.Type {
111+
switch self {
112+
case .atom:
113+
return atomType
114+
case .array(let child):
115+
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
116+
case .optional(let child):
117+
return TypeConstruction.optionalType(of: child.type(withAtomType: atomType))
118+
case .tuple(let children):
119+
return TypeConstruction.tupleType(of: children.map {
120+
$0.type(withAtomType: atomType)
121+
})
122+
}
123+
}
124+
125+
public typealias DefaultAtomType = Substring
126+
127+
public var type: Any.Type {
128+
type(withAtomType: DefaultAtomType.self)
129+
}
110130
}
111131

112132
// MARK: - Serialization

Sources/_StringProcessing/Utility/TypeConstruction.swift renamed to Sources/_MatchingEngine/Utility/TypeConstruction.swift

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,9 @@ private func swift_getTupleTypeMetadata3(
4848
proposedWitnesses: UnsafeRawPointer?
4949
) -> (value: Any.Type, state: Int)
5050

51-
enum TypeConstruction {
52-
51+
public enum TypeConstruction {
5352
/// Returns a tuple metatype of the given element types.
54-
static func tupleType<
53+
public static func tupleType<
5554
ElementTypes: BidirectionalCollection
5655
>(
5756
of elementTypes: __owned ElementTypes
@@ -104,7 +103,7 @@ enum TypeConstruction {
104103
}
105104

106105
/// Creates a type-erased tuple with the given elements.
107-
static func tuple<Elements: BidirectionalCollection>(
106+
public static func tuple<Elements: BidirectionalCollection>(
108107
of elements: __owned Elements
109108
) -> Any where Elements.Element == Any {
110109
// Open existential on the overall tuple type.
@@ -133,4 +132,18 @@ enum TypeConstruction {
133132
let elementTypes = elements.map { type(of: $0) }
134133
return _openExistential(tupleType(of: elementTypes), do: create)
135134
}
135+
136+
public static func arrayType(of childType: Any.Type) -> Any.Type {
137+
func helper<T>(_: T.Type) -> Any.Type {
138+
[T].self
139+
}
140+
return _openExistential(childType, do: helper)
141+
}
142+
143+
public static func optionalType(of childType: Any.Type) -> Any.Type {
144+
func helper<T>(_: T.Type) -> Any.Type {
145+
T?.self
146+
}
147+
return _openExistential(childType, do: helper)
148+
}
136149
}

Sources/_StringProcessing/Capture.swift

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,17 @@ import _MatchingEngine
55
enum Capture {
66
case atom(Any)
77
indirect case tuple([Capture])
8-
indirect case optional(Capture?)
9-
indirect case array([Capture])
8+
indirect case some(Capture)
9+
case none(childType: AnyCaptureType)
10+
indirect case array([Capture], childType: AnyCaptureType)
11+
12+
static func none(childType: Any.Type) -> Capture {
13+
.none(childType: AnyCaptureType(childType))
14+
}
15+
16+
static func array(_ children: [Capture], childType: Any.Type) -> Capture {
17+
.array(children, childType: AnyCaptureType(childType))
18+
}
1019
}
1120

1221
extension Capture {
@@ -25,21 +34,48 @@ extension Capture {
2534
case .tuple(let elements):
2635
return TypeConstruction.tuple(
2736
of: elements.map(\.value))
28-
case .array(let elements):
29-
guard let first = elements.first else {
30-
return [Any]()
37+
case .array(let elements, let childType):
38+
func helper<T>(_: T.Type) -> Any {
39+
elements.map { $0.value as! T }
3140
}
32-
// When the array is not empty, infer the concrete `Element `type from the first element.
33-
func helper<T>(_ first: T) -> Any {
34-
var castElements = [first]
35-
for element in elements.dropFirst() {
36-
castElements.append(element.value as! T)
37-
}
38-
return castElements
41+
return _openExistential(childType.base, do: helper)
42+
case .some(let subcapture):
43+
return subcapture.value
44+
case .none(let childType):
45+
func helper<T>(_: T.Type) -> Any {
46+
nil as T? as Any
3947
}
40-
return _openExistential(first.value, do: helper)
41-
case .optional(let subcapture):
42-
return subcapture?.value as Any
48+
return _openExistential(childType.base, do: helper)
49+
}
50+
}
51+
52+
private func prepending(_ newElement: Any) -> Self {
53+
switch self {
54+
case .atom, .some, .none, .array:
55+
return .tuple([.atom(newElement), self])
56+
case .tuple(let elements):
57+
return .tuple([.atom(newElement)] + elements)
4358
}
4459
}
60+
61+
func matchValue(withWholeMatch wholeMatch: Substring) -> Any {
62+
prepending(wholeMatch).value
63+
}
64+
}
65+
66+
/// A wrapper of an existential metatype, equatable and hashable by reference.
67+
struct AnyCaptureType: Equatable, Hashable {
68+
var base: Any.Type
69+
70+
init(_ type: Any.Type) {
71+
base = type
72+
}
73+
74+
static func == (lhs: AnyCaptureType, rhs: AnyCaptureType) -> Bool {
75+
lhs.base == rhs.base
76+
}
77+
78+
func hash(into hasher: inout Hasher) {
79+
hasher.combine(ObjectIdentifier(base))
80+
}
4581
}

0 commit comments

Comments
 (0)