Skip to content

Refactor AST into a struct containing AST.Node enum #130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ struct VariadicsGenerator: ParsableCommand {
outputForEach(
0..<arity, separator: ", ", lineTerminator: ""
) { i in
"x\(i).\(patternProtocolRequirementName).ast"
"x\(i).\(patternProtocolRequirementName).ast.root"
}
output("))\n")
output(" }\n}\n\n")
Expand Down
89 changes: 53 additions & 36 deletions Sources/_MatchingEngine/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,64 @@
//
//===----------------------------------------------------------------------===//

/// A regex abstract syntax tree
@frozen
public indirect enum AST:
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
{
/// ... | ... | ...
case alternation(Alternation)
/// A regex abstract syntax tree. This is a top-level type that stores the root
/// node.
public struct AST: Hashable {
public var root: AST.Node
public init(_ root: AST.Node) {
self.root = root
}
}

/// ... ...
case concatenation(Concatenation)
extension AST {
/// Whether this AST tree has nested somewhere inside it a capture.
public var hasCapture: Bool { root.hasCapture }

/// (...)
case group(Group)
/// The capture structure of this AST tree.
public var captureStructure: CaptureStructure { root.captureStructure }
}

/// (?(cond) true-branch | false-branch)
case conditional(Conditional)
extension AST {
/// A node in the regex AST.
@frozen
public indirect enum Node:
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
{
/// ... | ... | ...
case alternation(Alternation)

case quantification(Quantification)
/// ... ...
case concatenation(Concatenation)

/// \Q...\E
case quote(Quote)
/// (...)
case group(Group)

/// Comments, non-semantic whitespace, etc
case trivia(Trivia)
/// (?(cond) true-branch | false-branch)
case conditional(Conditional)

case atom(Atom)
case quantification(Quantification)

case customCharacterClass(CustomCharacterClass)
/// \Q...\E
case quote(Quote)

case absentFunction(AbsentFunction)
/// Comments, non-semantic whitespace, etc
case trivia(Trivia)

case empty(Empty)
case atom(Atom)

// FIXME: Move off the regex literal AST
case groupTransform(
Group, transform: CaptureTransform)
}
case customCharacterClass(CustomCharacterClass)

// TODO: Do we want something that holds the AST and stored global options?
case absentFunction(AbsentFunction)

extension AST {
case empty(Empty)

// FIXME: Move off the regex literal AST
case groupTransform(
Group, transform: CaptureTransform)
}
}

extension AST.Node {
// :-(
//
// Existential-based programming is highly prone to silent
Expand Down Expand Up @@ -79,7 +96,7 @@ extension AST {
}

/// If this node is a parent node, access its children
public var children: [AST]? {
public var children: [AST.Node]? {
return (_associatedValue as? _ASTParent)?.children
}

Expand Down Expand Up @@ -127,10 +144,10 @@ extension AST {
extension AST {

public struct Alternation: Hashable, _ASTNode {
public let children: [AST]
public let children: [AST.Node]
public let pipes: [SourceLocation]

public init(_ mems: [AST], pipes: [SourceLocation]) {
public init(_ mems: [AST.Node], pipes: [SourceLocation]) {
// An alternation must have at least two branches (though the branches
// may be empty AST nodes), and n - 1 pipes.
precondition(mems.count >= 2)
Expand All @@ -146,10 +163,10 @@ extension AST {
}

public struct Concatenation: Hashable, _ASTNode {
public let children: [AST]
public let children: [AST.Node]
public let location: SourceLocation

public init(_ mems: [AST], _ location: SourceLocation) {
public init(_ mems: [AST.Node], _ location: SourceLocation) {
self.children = mems
self.location = location
}
Expand Down Expand Up @@ -201,16 +218,16 @@ extension AST {
public enum Kind: Hashable {
/// An absent repeater `(?~absent)`. This is equivalent to `(?~|absent|.*)`
/// and therefore matches as long as the pattern `absent` is not matched.
case repeater(AST)
case repeater(AST.Node)

/// An absent expression `(?~|absent|expr)`, which defines an `absent`
/// pattern which must not be matched against while the pattern `expr` is
/// matched.
case expression(absentee: AST, pipe: SourceLocation, expr: AST)
case expression(absentee: AST.Node, pipe: SourceLocation, expr: AST.Node)

/// An absent stopper `(?~|absent)`, which prevents matching against
/// `absent` until the end of the regex, or until it is cleared.
case stopper(AST)
case stopper(AST.Node)

/// An absent clearer `(?~|)` which cancels the effect of an absent
/// stopper.
Expand Down
8 changes: 4 additions & 4 deletions Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ extension _ASTNode {
}

protocol _ASTParent: _ASTNode {
var children: [AST] { get }
var children: [AST.Node] { get }
}

extension AST.Concatenation: _ASTParent {}
extension AST.Alternation: _ASTParent {}

extension AST.Group: _ASTParent {
var children: [AST] { [child] }
var children: [AST.Node] { [child] }
}
extension AST.Quantification: _ASTParent {
var children: [AST] { [child] }
var children: [AST.Node] { [child] }
}
extension AST.AbsentFunction: _ASTParent {
var children: [AST] {
var children: [AST.Node] {
switch kind {
case .repeater(let a), .stopper(let a): return [a]
case .expression(let a, _, let c): return [a, c]
Expand Down
2 changes: 1 addition & 1 deletion Sources/_MatchingEngine/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ extension AST.Atom {
}
}

extension AST {
extension AST.Node {
public var literalStringValue: String? {
switch self {
case .atom(let a): return a.literalStringValue
Expand Down
8 changes: 4 additions & 4 deletions Sources/_MatchingEngine/Regex/AST/Conditional.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ extension AST {
public var location: SourceLocation
public var condition: Condition

public var trueBranch: AST
public var trueBranch: AST.Node
public var pipe: SourceLocation?
public var falseBranch: AST
public var falseBranch: AST.Node

public init(
_ condition: Condition, trueBranch: AST, pipe: SourceLocation?,
falseBranch: AST, _ location: SourceLocation
_ condition: Condition, trueBranch: AST.Node, pipe: SourceLocation?,
falseBranch: AST.Node, _ location: SourceLocation
) {
self.location = location
self.condition = condition
Expand Down
4 changes: 2 additions & 2 deletions Sources/_MatchingEngine/Regex/AST/Group.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
extension AST {
public struct Group: Hashable {
public let kind: Located<Kind>
public let child: AST
public let child: AST.Node

public let location: SourceLocation

public init(
_ kind: Located<Kind>, _ child: AST, _ r: SourceLocation
_ kind: Located<Kind>, _ child: AST.Node, _ r: SourceLocation
) {
self.kind = kind
self.child = child
Expand Down
4 changes: 2 additions & 2 deletions Sources/_MatchingEngine/Regex/AST/Quantification.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ extension AST {
public let amount: Located<Amount>
public let kind: Located<Kind>

public let child: AST
public let child: AST.Node
public let location: SourceLocation

public init(
_ amount: Located<Amount>,
_ kind: Located<Kind>,
_ child: AST,
_ child: AST.Node,
_ r: SourceLocation
) {
self.amount = amount
Expand Down
4 changes: 2 additions & 2 deletions Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public enum CaptureStructure: Equatable {
}
}

extension AST {
extension AST.Node {
public var captureStructure: CaptureStructure {
// Note: This implementation could be more optimized.
switch self {
Expand Down Expand Up @@ -68,7 +68,7 @@ extension AST {
var captures = CaptureStructure.empty
switch c.condition.kind {
case .group(let g):
captures = captures + AST.group(g).captureStructure
captures = captures + AST.Node.group(g).captureStructure
default:
break
}
Expand Down
14 changes: 7 additions & 7 deletions Sources/_MatchingEngine/Regex/Parse/Parse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ extension Parser {
}
fatalError("Unhandled termination condition")
}
return ast
return .init(ast)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do global options come in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll parse them here and store them on AST

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, that will be based on top of this. SGTM

}

/// Parse a regular expression node. This should be used instead of `parse()`
Expand All @@ -136,7 +136,7 @@ extension Parser {
/// RegexNode -> '' | Alternation
/// Alternation -> Concatenation ('|' Concatenation)*
///
mutating func parseNode() throws -> AST {
mutating func parseNode() throws -> AST.Node {
let _start = source.currentPosition

if source.isEmpty { return .empty(.init(loc(_start))) }
Expand All @@ -163,8 +163,8 @@ extension Parser {
/// ConcatComponent -> Trivia | Quote | Quantification
/// Quantification -> QuantOperand Quantifier?
///
mutating func parseConcatenation() throws -> AST {
var result = Array<AST>()
mutating func parseConcatenation() throws -> AST.Node {
var result = [AST.Node]()
let _start = source.currentPosition

while true {
Expand Down Expand Up @@ -219,9 +219,9 @@ extension Parser {
/// Perform a recursive parse for the branches of a conditional.
mutating func parseConditionalBranches(
start: Source.Position, _ cond: AST.Conditional.Condition
) throws -> AST {
) throws -> AST.Node {
let child = try parseNode()
let trueBranch: AST, falseBranch: AST, pipe: SourceLocation?
let trueBranch: AST.Node, falseBranch: AST.Node, pipe: SourceLocation?
switch child {
case .alternation(let a):
// If we have an alternation child, we only accept 2 branches.
Expand Down Expand Up @@ -316,7 +316,7 @@ extension Parser {
/// Conditional -> ConditionalStart Concatenation ('|' Concatenation)? ')'
/// ConditionalStart -> KnownConditionalStart | GroupConditionalStart
///
mutating func parseQuantifierOperand() throws -> AST? {
mutating func parseQuantifierOperand() throws -> AST.Node? {
assert(!source.isEmpty)

let _start = source.currentPosition
Expand Down
10 changes: 8 additions & 2 deletions Sources/_MatchingEngine/Regex/Printing/DumpAST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ extension _ASTPrintable {
public var description: String { _print() }
public var debugDescription: String { _dump() }

var _children: [AST]? {
var _children: [AST.Node]? {
if let children = (self as? _ASTParent)?.children {
return children
}
if let children = (self as? AST)?.children {
if let children = (self as? AST.Node)?.children {
return children
}
return nil
Expand All @@ -57,6 +57,12 @@ extension _ASTPrintable {
}

extension AST: _ASTPrintable {
public var _dumpBase: String {
root._dumpBase
}
}

extension AST.Node: _ASTPrintable {
public var _dumpBase: String {
_associatedValue._dumpBase
}
Expand Down
27 changes: 26 additions & 1 deletion Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,38 @@ extension AST {
}
}

extension AST.Node {
/// Render using Swift's preferred regex literal syntax
public func renderAsCanonical(
showDelimiters delimiters: Bool = false,
terminateLine: Bool = false
) -> String {
var printer = PrettyPrinter()
printer.printAsCanonical(
self,
delimiters: delimiters,
terminateLine: terminateLine)
return printer.finish()
}
}

extension PrettyPrinter {
/// Will output `ast` in canonical form, taking care to
/// also indent and terminate the line (updating internal state)
mutating func printAsCanonical(
_ ast: AST,
delimiters: Bool = false,
terminateLine terminate: Bool = true
) {
printAsCanonical(ast.root, delimiters: delimiters, terminateLine: terminate)
}

/// Will output `ast` in canonical form, taking care to
/// also indent and terminate the line (updating internal state)
mutating func printAsCanonical(
_ ast: AST.Node,
delimiters: Bool = false,
terminateLine terminate: Bool = true
) {
indent()
if delimiters { output("'/") }
Expand All @@ -45,7 +70,7 @@ extension PrettyPrinter {

/// Output the `ast` in canonical form, does not indent, terminate,
/// or affect internal state
mutating func outputAsCanonical(_ ast: AST) {
mutating func outputAsCanonical(_ ast: AST.Node) {
switch ast {
case let .alternation(a):
for idx in a.children.indices {
Expand Down
Loading