Skip to content

Commit 41ffdc7

Browse files
committed
Heterogeneous alternation
Introduce `oneOf` combinator, which takes a builder block and joins all elements into a regex alterantion. Fix the legacy VM to support alternation with captures. ----- Example: ```swift oneOf { "a".capture() "b".capture() "c" } => `.Match = (Substring, Substring?, Substring?)` ```
1 parent 3db17b5 commit 41ffdc7

File tree

9 files changed

+1466
-39
lines changed

9 files changed

+1466
-39
lines changed

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,20 @@ struct VariadicsGenerator: ParsableCommand {
160160
print(to: &standardError)
161161
}
162162

163+
print("Generating alternation overloads...", to: &standardError)
164+
for (leftArity, rightArity) in Permutations(totalArity: maxArity) {
165+
print(
166+
" Left arity: \(leftArity) Right arity: \(rightArity)",
167+
to: &standardError)
168+
emitAlternation(leftArity: leftArity, rightArity: rightArity)
169+
}
170+
171+
print("Generating 'AlternationBuilder.buildBlock(_:)' overloads...", to: &standardError)
172+
for arity in 1..<maxArity {
173+
print(" Capture arity: \(arity)", to: &standardError)
174+
emitUnaryAlternationBuildBlock(arity: arity)
175+
}
176+
163177
output("\n\n")
164178

165179
output("// END AUTO-GENERATED CONTENT\n")
@@ -444,4 +458,90 @@ struct VariadicsGenerator: ParsableCommand {
444458
445459
""")
446460
}
461+
462+
func emitAlternation(leftArity: Int, rightArity: Int) {
463+
let typeName = "_Alternation_\(leftArity)_\(rightArity)"
464+
let leftGenParams: String = {
465+
if leftArity == 0 {
466+
return "R0"
467+
}
468+
return "R0, W0, " + (0..<leftArity).map { "C\($0)" }.joined(separator: ", ")
469+
}()
470+
let rightGenParams: String = {
471+
if rightArity == 0 {
472+
return "R1"
473+
}
474+
return "R1, W1, " + (leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")
475+
}()
476+
let genericParams = leftGenParams + ", " + rightGenParams
477+
let whereClause: String = {
478+
var result = "where R0: \(regexProtocolName), R1: \(regexProtocolName)"
479+
if leftArity > 0 {
480+
result += ", R0.\(matchAssociatedTypeName) == (W0, \((0..<leftArity).map { "C\($0)" }.joined(separator: ", ")))"
481+
}
482+
if rightArity > 0 {
483+
result += ", R1.\(matchAssociatedTypeName) == (W1, \((leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")))"
484+
}
485+
return result
486+
}()
487+
let resultCaptures: String = {
488+
var result = (0..<leftArity).map { "C\($0)" }.joined(separator: ", ")
489+
if leftArity > 0, rightArity > 0 {
490+
result += ", "
491+
}
492+
result += (leftArity..<leftArity+rightArity).map { "C\($0)?" }.joined(separator: ", ")
493+
return result
494+
}()
495+
let matchType: String = {
496+
if leftArity == 0, rightArity == 0 {
497+
return baseMatchTypeName
498+
}
499+
return "(\(baseMatchTypeName), \(resultCaptures))"
500+
}()
501+
output("""
502+
public struct \(typeName)<\(genericParams)>: \(regexProtocolName) \(whereClause) {
503+
public typealias Match = \(matchType)
504+
public let regex: Regex<Match>
505+
506+
public init(_ left: R0, _ right: R1) {
507+
self.regex = .init(ast: appendAlt(right.regex.ast.root, to: left.regex.ast.root))
508+
}
509+
}
510+
511+
extension AlternationBuilder {
512+
public static func buildBlock<\(genericParams)>(combining next: R1, into combined: R0) -> \(typeName)<\(genericParams)> {
513+
.init(combined, next)
514+
}
515+
}
516+
517+
public func | <\(genericParams)>(lhs: R0, rhs: R1) -> \(typeName)<\(genericParams)> {
518+
.init(lhs, rhs)
519+
}
520+
521+
""")
522+
}
523+
524+
func emitUnaryAlternationBuildBlock(arity: Int) {
525+
assert(arity > 0)
526+
let captures = (0..<arity).map { "C\($0)" }.joined(separator: ", ")
527+
let genericParams: String = {
528+
if arity == 0 {
529+
return "R"
530+
}
531+
return "R, W, " + captures
532+
}()
533+
let whereClause: String = """
534+
where R: \(regexProtocolName), \
535+
R.\(matchAssociatedTypeName) == (W, \(captures))
536+
"""
537+
let resultCaptures = (0..<arity).map { "C\($0)?" }.joined(separator: ", ")
538+
output("""
539+
extension AlternationBuilder {
540+
public static func buildBlock<\(genericParams)>(_ regex: R) -> Regex<(W, \(resultCaptures))> \(whereClause) {
541+
.init(ast: alt(regex.regex.ast.root))
542+
}
543+
}
544+
545+
""")
546+
}
447547
}

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ extension AST {
153153
public init(_ mems: [AST.Node], pipes: [SourceLocation]) {
154154
// An alternation must have at least two branches (though the branches
155155
// may be empty AST nodes), and n - 1 pipes.
156-
precondition(mems.count >= 2)
156+
// precondition(mems.count >= 2)
157157
precondition(pipes.count == mems.count - 1)
158158

159159
self.children = mems

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ extension AST.Node {
3030
// Note: This implementation could be more optimized.
3131
switch self {
3232
case .alternation(let alternation):
33-
assert(alternation.children.count > 1)
3433
return alternation.children
3534
.map(\.captureStructure)
3635
.reduce(.empty, +)

Sources/_StringProcessing/ASTBuilder.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ func alt(_ asts: AST.Node...) -> AST.Node {
3636
alt(asts)
3737
}
3838

39+
func appendAlt(_ new: AST.Node, to combined: AST.Node) -> AST.Node {
40+
if case .alternation(let alternation) = combined {
41+
return alt(alternation.children + [new])
42+
}
43+
return alt(combined, new)
44+
}
45+
3946
func concat(_ asts: [AST.Node]) -> AST.Node {
4047
.concatenation(.init(asts, .fake))
4148
}

Sources/_StringProcessing/Legacy/LegacyCompile.swift

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,22 +238,53 @@ func compile(
238238
// other cases branch back.
239239
//
240240
let children = alt.children
241-
assert(!children.isEmpty)
242-
guard children.count > 1 else {
243-
return try compileNode(children[0])
241+
// For every capturing child after the child at the given index, emit a
242+
// nil capture. This is used for skipping the remaining alternation
243+
// cases after a succesful match.
244+
func nullifyRest(after index: Int) {
245+
for child in children.suffix(from: index + 1) where child.hasCapture {
246+
instructions.append(contentsOf: [
247+
.beginGroup,
248+
.captureNil(childType: child.captureStructure.type),
249+
.endGroup,
250+
])
251+
}
244252
}
245253

246254
let last = children.last!
247255
let middle = children.dropLast()
248256
let done = createLabel()
249-
for child in middle {
257+
for (childIndex, child) in middle.enumerated() {
250258
let nextLabel = createLabel()
259+
if child.hasCapture {
260+
instructions.append(.beginGroup)
261+
}
251262
instructions.append(.split(disfavoring: nextLabel.label!))
252263
try compileNode(child)
253-
instructions.append(.goto(label: done.label!))
254-
instructions.append(nextLabel)
264+
if child.hasCapture {
265+
instructions.append(.captureSome)
266+
instructions.append(.endGroup)
267+
}
268+
nullifyRest(after: childIndex)
269+
instructions.append(contentsOf: [
270+
.goto(label: done.label!),
271+
nextLabel
272+
])
273+
if child.hasCapture {
274+
instructions.append(contentsOf: [
275+
.captureNil(childType: child.captureStructure.type),
276+
.endGroup
277+
])
278+
}
279+
}
280+
if last.hasCapture {
281+
instructions.append(.beginGroup)
255282
}
256283
try compileNode(last)
284+
if last.hasCapture {
285+
instructions.append(.captureSome)
286+
instructions.append(.endGroup)
287+
}
257288
instructions.append(done)
258289
return
259290

0 commit comments

Comments
 (0)