Skip to content

Commit b0e6bee

Browse files
committed
Heterogeneous alternation
Introduce `oneOf` combinator, which takes a builder block and joins all elements into a regex alterantion. Fix the legacy VM to support alternation with captures. ----- Example: ```swift oneOf { "a".capture() "b".capture() "c" } => `.Match = (Substring, Substring?, Substring?)` ```
1 parent 4c252c0 commit b0e6bee

File tree

8 files changed

+1473
-37
lines changed

8 files changed

+1473
-37
lines changed

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,20 @@ struct VariadicsGenerator: ParsableCommand {
160160
print(to: &standardError)
161161
}
162162

163+
print("Generating alternation overloads...", to: &standardError)
164+
for (leftArity, rightArity) in Permutations(totalArity: maxArity) {
165+
print(
166+
" Left arity: \(leftArity) Right arity: \(rightArity)",
167+
to: &standardError)
168+
emitAlternation(leftArity: leftArity, rightArity: rightArity)
169+
}
170+
171+
print("Generating 'AlternationBuilder.buildBlock(_:)' overloads...", to: &standardError)
172+
for arity in 1..<maxArity {
173+
print(" Capture arity: \(arity)", to: &standardError)
174+
emitUnaryAlternationBuildBlock(arity: arity)
175+
}
176+
163177
output("\n\n")
164178

165179
output("// END AUTO-GENERATED CONTENT\n")
@@ -444,4 +458,90 @@ struct VariadicsGenerator: ParsableCommand {
444458
445459
""")
446460
}
461+
462+
func emitAlternation(leftArity: Int, rightArity: Int) {
463+
let typeName = "_Alternation_\(leftArity)_\(rightArity)"
464+
let leftGenParams: String = {
465+
if leftArity == 0 {
466+
return "R0"
467+
}
468+
return "R0, W0, " + (0..<leftArity).map { "C\($0)" }.joined(separator: ", ")
469+
}()
470+
let rightGenParams: String = {
471+
if rightArity == 0 {
472+
return "R1"
473+
}
474+
return "R1, W1, " + (leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")
475+
}()
476+
let genericParams = leftGenParams + ", " + rightGenParams
477+
let whereClause: String = {
478+
var result = "where R0: \(regexProtocolName), R1: \(regexProtocolName)"
479+
if leftArity > 0 {
480+
result += ", R0.\(matchAssociatedTypeName) == (W0, \((0..<leftArity).map { "C\($0)" }.joined(separator: ", ")))"
481+
}
482+
if rightArity > 0 {
483+
result += ", R1.\(matchAssociatedTypeName) == (W1, \((leftArity..<leftArity+rightArity).map { "C\($0)" }.joined(separator: ", ")))"
484+
}
485+
return result
486+
}()
487+
let resultCaptures: String = {
488+
var result = (0..<leftArity).map { "C\($0)" }.joined(separator: ", ")
489+
if leftArity > 0, rightArity > 0 {
490+
result += ", "
491+
}
492+
result += (leftArity..<leftArity+rightArity).map { "C\($0)?" }.joined(separator: ", ")
493+
return result
494+
}()
495+
let matchType: String = {
496+
if leftArity == 0, rightArity == 0 {
497+
return baseMatchTypeName
498+
}
499+
return "(\(baseMatchTypeName), \(resultCaptures))"
500+
}()
501+
output("""
502+
public struct \(typeName)<\(genericParams)>: \(regexProtocolName) \(whereClause) {
503+
public typealias Match = \(matchType)
504+
public let regex: Regex<Match>
505+
506+
public init(_ left: R0, _ right: R1) {
507+
self.regex = .init(node: left.regex.root.appendingAlternationCase(right.regex.root))
508+
}
509+
}
510+
511+
extension AlternationBuilder {
512+
public static func buildBlock<\(genericParams)>(combining next: R1, into combined: R0) -> \(typeName)<\(genericParams)> {
513+
.init(combined, next)
514+
}
515+
}
516+
517+
public func | <\(genericParams)>(lhs: R0, rhs: R1) -> \(typeName)<\(genericParams)> {
518+
.init(lhs, rhs)
519+
}
520+
521+
""")
522+
}
523+
524+
func emitUnaryAlternationBuildBlock(arity: Int) {
525+
assert(arity > 0)
526+
let captures = (0..<arity).map { "C\($0)" }.joined(separator: ", ")
527+
let genericParams: String = {
528+
if arity == 0 {
529+
return "R"
530+
}
531+
return "R, W, " + captures
532+
}()
533+
let whereClause: String = """
534+
where R: \(regexProtocolName), \
535+
R.\(matchAssociatedTypeName) == (W, \(captures))
536+
"""
537+
let resultCaptures = (0..<arity).map { "C\($0)?" }.joined(separator: ", ")
538+
output("""
539+
extension AlternationBuilder {
540+
public static func buildBlock<\(genericParams)>(_ regex: R) -> Regex<(W, \(resultCaptures))> \(whereClause) {
541+
.init(node: .alternation([regex.regex.root]))
542+
}
543+
}
544+
545+
""")
546+
}
447547
}

Sources/_StringProcessing/ASTBuilder.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ func alt(_ asts: AST.Node...) -> AST.Node {
3636
alt(asts)
3737
}
3838

39+
func appendAlt(_ new: AST.Node, to combined: AST.Node) -> AST.Node {
40+
if case .alternation(let alternation) = combined {
41+
return alt(alternation.children + [new])
42+
}
43+
return alt(combined, new)
44+
}
45+
3946
func concat(_ asts: [AST.Node]) -> AST.Node {
4047
.concatenation(.init(asts, .fake))
4148
}

Sources/_StringProcessing/Legacy/LegacyCompile.swift

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,22 +240,54 @@ func compile(
240240
// E.g. `a` falls-through to the rest of the program and the
241241
// other cases branch back.
242242
//
243-
assert(!children.isEmpty)
244-
guard children.count > 1 else {
245-
return try compileNode(children[0])
243+
244+
// For every capturing child after the child at the given index, emit a
245+
// nil capture. This is used for skipping the remaining alternation
246+
// cases after a succesful match.
247+
func nullifyRest(after index: Int) {
248+
for child in children.suffix(from: index + 1) where child.hasCapture {
249+
instructions.append(contentsOf: [
250+
.beginGroup,
251+
.captureNil(childType: child.captureStructure.type),
252+
.endGroup,
253+
])
254+
}
246255
}
247256

248257
let last = children.last!
249258
let middle = children.dropLast()
250259
let done = createLabel()
251-
for child in middle {
260+
for (childIndex, child) in middle.enumerated() {
252261
let nextLabel = createLabel()
262+
if child.hasCapture {
263+
instructions.append(.beginGroup)
264+
}
253265
instructions.append(.split(disfavoring: nextLabel.label!))
254266
try compileNode(child)
255-
instructions.append(.goto(label: done.label!))
256-
instructions.append(nextLabel)
267+
if child.hasCapture {
268+
instructions.append(.captureSome)
269+
instructions.append(.endGroup)
270+
}
271+
nullifyRest(after: childIndex)
272+
instructions.append(contentsOf: [
273+
.goto(label: done.label!),
274+
nextLabel
275+
])
276+
if child.hasCapture {
277+
instructions.append(contentsOf: [
278+
.captureNil(childType: child.captureStructure.type),
279+
.endGroup
280+
])
281+
}
282+
}
283+
if last.hasCapture {
284+
instructions.append(.beginGroup)
257285
}
258286
try compileNode(last)
287+
if last.hasCapture {
288+
instructions.append(.captureSome)
289+
instructions.append(.endGroup)
290+
}
259291
instructions.append(done)
260292
return
261293

0 commit comments

Comments
 (0)