Skip to content

Commit c650aaf

Browse files
committed
DSLTree support for CaptureStructure
1 parent 55ca29c commit c650aaf

File tree

3 files changed

+183
-59
lines changed

3 files changed

+183
-59
lines changed

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 126 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -25,71 +25,138 @@ public enum CaptureStructure: Equatable {
2525
}
2626
}
2727

28+
extension CaptureStructure {
29+
public init<C: Collection>(
30+
alternating children: C
31+
) where C.Element: _TreeNode {
32+
assert(children.count > 1)
33+
self = children
34+
.map(\.captureStructure)
35+
.reduce(.empty, +)
36+
.map(CaptureStructure.optional)
37+
}
38+
public init<C: Collection>(
39+
concatenating children: C
40+
) where C.Element: _TreeNode {
41+
self = children.map(\.captureStructure).reduce(.empty, +)
42+
}
43+
44+
public init<T: _TreeNode>(
45+
grouping child: T, as kind: AST.Group.Kind
46+
) {
47+
let innerCaptures = child.captureStructure
48+
switch kind {
49+
case .capture:
50+
self = .atom() + innerCaptures
51+
case .namedCapture(let name):
52+
self = .atom(name: name.value) + innerCaptures
53+
case .balancedCapture(let b):
54+
self = .atom(name: b.name?.value) + innerCaptures
55+
default:
56+
precondition(!kind.isCapturing)
57+
self = innerCaptures
58+
}
59+
}
60+
61+
public init<T: _TreeNode>(
62+
grouping child: T,
63+
as kind: AST.Group.Kind,
64+
withTransform transform: CaptureTransform
65+
) {
66+
let innerCaptures = child.captureStructure
67+
switch kind {
68+
case .capture:
69+
self = .atom(type: AnyType(transform.resultType)) + innerCaptures
70+
case .namedCapture(let name):
71+
self = .atom(name: name.value, type: AnyType(transform.resultType))
72+
+ innerCaptures
73+
default:
74+
self = innerCaptures
75+
}
76+
}
77+
78+
// TODO: We'll likely want/need a generalization of
79+
// conditional's condition kind.
80+
public init<T: _TreeNode>(
81+
condition: AST.Conditional.Condition.Kind,
82+
trueBranch: T,
83+
falseBranch: T
84+
) {
85+
// A conditional's capture structure is effectively that of an alternation
86+
// between the true and false branches. However the condition may also
87+
// have captures in the case of a group condition.
88+
var captures = CaptureStructure.empty
89+
switch condition {
90+
case .group(let g):
91+
captures = captures + AST.Node.group(g).captureStructure
92+
default:
93+
break
94+
}
95+
let branchCaptures = trueBranch.captureStructure +
96+
falseBranch.captureStructure
97+
self = captures + branchCaptures.map(
98+
CaptureStructure.optional)
99+
}
100+
101+
public init<T: _TreeNode>(
102+
quantifying child: T, amount: AST.Quantification.Amount
103+
) {
104+
self = child.captureStructure.map(
105+
amount == .zeroOrOne
106+
? CaptureStructure.optional
107+
: CaptureStructure.array)
108+
}
109+
110+
// TODO: Will need to adjust for DSLTree support, and
111+
// "absent" isn't the best name for these.
112+
public init(
113+
absent kind: AST.AbsentFunction.Kind
114+
) {
115+
// Only the child of an expression absent function is relevant, as the
116+
// other expressions don't actually get matched against.
117+
switch kind {
118+
case .expression(_, _, let child):
119+
self = child.captureStructure
120+
case .clearer, .repeater, .stopper:
121+
self = .empty
122+
}
123+
}
124+
125+
}
126+
28127
extension AST.Node {
29128
public var captureStructure: CaptureStructure {
30129
// Note: This implementation could be more optimized.
31130
switch self {
32-
case .alternation(let alternation):
33-
assert(alternation.children.count > 1)
34-
return alternation.children
35-
.map(\.captureStructure)
36-
.reduce(.empty, +)
37-
.map(CaptureStructure.optional)
38-
case .concatenation(let concatenation):
39-
return concatenation.children.map(\.captureStructure).reduce(.empty, +)
40-
case .group(let group):
41-
let innerCaptures = group.child.captureStructure
42-
switch group.kind.value {
43-
case .capture:
44-
return .atom() + innerCaptures
45-
case .namedCapture(let name):
46-
return .atom(name: name.value) + innerCaptures
47-
case .balancedCapture(let b):
48-
return .atom(name: b.name?.value) + innerCaptures
49-
default:
50-
precondition(!group.kind.value.isCapturing)
51-
return innerCaptures
52-
}
53-
case .groupTransform(let group, let transform):
54-
let innerCaptures = group.child.captureStructure
55-
switch group.kind.value {
56-
case .capture:
57-
return .atom(type: AnyType(transform.resultType)) + innerCaptures
58-
case .namedCapture(let name):
59-
return .atom(name: name.value, type: AnyType(transform.resultType))
60-
+ innerCaptures
61-
default:
62-
return innerCaptures
63-
}
131+
case let .alternation(a):
132+
return CaptureStructure(alternating: a.children)
133+
134+
case let .concatenation(c):
135+
return CaptureStructure(concatenating: c.children)
136+
137+
case let .group(g):
138+
return CaptureStructure(
139+
grouping: g.child, as: g.kind.value)
140+
141+
case .groupTransform(let g, let transform):
142+
return CaptureStructure(
143+
grouping: g.child,
144+
as: g.kind.value,
145+
withTransform: transform)
146+
64147
case .conditional(let c):
65-
// A conditional's capture structure is effectively that of an alternation
66-
// between the true and false branches. However the condition may also
67-
// have captures in the case of a group condition.
68-
var captures = CaptureStructure.empty
69-
switch c.condition.kind {
70-
case .group(let g):
71-
captures = captures + AST.Node.group(g).captureStructure
72-
default:
73-
break
74-
}
75-
let branchCaptures = c.trueBranch.captureStructure +
76-
c.falseBranch.captureStructure
77-
return captures + branchCaptures.map(CaptureStructure.optional)
78-
79-
case .quantification(let quantification):
80-
return quantification.child.captureStructure.map(
81-
quantification.amount.value == .zeroOrOne
82-
? CaptureStructure.optional
83-
: CaptureStructure.array)
148+
return CaptureStructure(
149+
condition: c.condition.kind,
150+
trueBranch: c.trueBranch,
151+
falseBranch: c.falseBranch)
152+
153+
case .quantification(let q):
154+
return CaptureStructure(
155+
quantifying: q.child, amount: q.amount.value)
156+
84157
case .absentFunction(let abs):
85-
// Only the child of an expression absent function is relevant, as the
86-
// other expressions don't actually get matched against.
87-
switch abs.kind {
88-
case .expression(_, _, let child):
89-
return child.captureStructure
90-
case .clearer, .repeater, .stopper:
91-
return .empty
92-
}
158+
return CaptureStructure(absent: abs.kind)
159+
93160
case .quote, .trivia, .atom, .customCharacterClass, .empty:
94161
return .empty
95162
}

Sources/_MatchingEngine/Regex/TreeProtocols.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
public protocol _TreeNode {
44
var children: [Self]? { get }
5+
6+
var captureStructure: CaptureStructure { get }
57
}
68

79
extension _TreeNode {

Sources/_StringProcessing/RegexDSL/DSLTree.swift

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ extension DSLTree {
3333
case group(AST.Group.Kind, Node)
3434

3535
/// (?(cond) true-branch | false-branch)
36+
///
37+
/// TODO: Consider splitting off grouped conditions, or have our own kind
3638
case conditional(
3739
AST.Conditional.Condition.Kind, Node, Node)
3840

@@ -59,6 +61,8 @@ extension DSLTree {
5961
case regexLiteral(AST.Node)
6062

6163
// TODO: What should we do here?
64+
///
65+
/// TODO: Consider splitting off expression functions, or have our own kind
6266
case absentFunction(AST.AbsentFunction)
6367

6468
// MARK: - Tree conversions
@@ -422,3 +426,54 @@ extension DSLTree.Node {
422426
return self.children?.any(\.hasCapture) ?? false
423427
}
424428
}
429+
430+
extension DSLTree {
431+
var captureStructure: CaptureStructure {
432+
root.captureStructure
433+
}
434+
}
435+
extension DSLTree.Node {
436+
var captureStructure: CaptureStructure {
437+
switch self {
438+
case let .alternation(children):
439+
return CaptureStructure(alternating: children)
440+
441+
case let .concatenation(children):
442+
return CaptureStructure(concatenating: children)
443+
444+
case let .group(kind, child):
445+
return CaptureStructure(grouping: child, as: kind)
446+
447+
case let .groupTransform(kind, child, transform):
448+
return CaptureStructure(
449+
grouping: child, as: kind, withTransform: transform)
450+
451+
case let .conditional(cond, trueBranch, falseBranch):
452+
return CaptureStructure(
453+
condition: cond,
454+
trueBranch: trueBranch,
455+
falseBranch: falseBranch)
456+
457+
case let .quantification(amount, _, child):
458+
return CaptureStructure(
459+
quantifying: child, amount: amount)
460+
461+
case let .regexLiteral(re):
462+
return re.captureStructure
463+
464+
case let .absentFunction(abs):
465+
return CaptureStructure(absent: abs.kind)
466+
467+
case let .convertedRegexLiteral(n, _):
468+
return n.captureStructure
469+
470+
case .consumerValidator:
471+
// FIXME: This is where we make a capture!
472+
return .empty
473+
474+
case .customCharacterClass, .atom, .trivia, .empty,
475+
.quotedLiteral, .consumer, .characterPredicate:
476+
return .empty
477+
}
478+
}
479+
}

0 commit comments

Comments
 (0)