Skip to content

Structuralization of captures #160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Sources/_MatchingEngine/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ extension AST {
public var hasCapture: Bool { root.hasCapture }

/// The capture structure of this AST tree.
public var captureStructure: CaptureStructure { root.captureStructure }
public var captureStructure: CaptureStructure {
var constructor = CaptureStructure.Constructor(.flatten)
return root._captureStructure(&constructor)
}
}

extension AST {
Expand Down
142 changes: 87 additions & 55 deletions Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,137 +25,161 @@ public enum CaptureStructure: Equatable {
}
}

// TODO: Below are all flattening constructors. Instead create
// a builder/visitor that can store the structuralization
// approach

extension CaptureStructure {
public init<C: Collection>(
alternating children: C
) where C.Element: _TreeNode {
assert(children.count > 1)
self = children
.map(\.captureStructure)
.reduce(.empty, +)
.map(CaptureStructure.optional)
public struct Constructor {
var strategy: Strategy

public init(_ strategy: Strategy = .flatten) {
guard strategy == .flatten else {
fatalError("TODO: adjust creator methods")
}
self.strategy = strategy
}
}
}

extension CaptureStructure.Constructor {
public mutating func alternating<C: Collection>(
_ children: C
) -> CaptureStructure where C.Element: _TreeNode {
// assert(children.count > 1)
return children.map {
$0._captureStructure(&self)
}.reduce(.empty, +)
.map(CaptureStructure.optional)
}
public init<C: Collection>(
concatenating children: C
) where C.Element: _TreeNode {
self = children.map(\.captureStructure).reduce(.empty, +)
public mutating func concatenating<C: Collection>(
_ children: C
) -> CaptureStructure where C.Element: _TreeNode {
return children.map {
$0._captureStructure(&self)
}.reduce(.empty, +)
}

public init<T: _TreeNode>(
grouping child: T, as kind: AST.Group.Kind
) {
let innerCaptures = child.captureStructure
public mutating func grouping<T: _TreeNode>(
_ child: T, as kind: AST.Group.Kind
) -> CaptureStructure {
let innerCaptures = child._captureStructure(&self)
switch kind {
case .capture:
self = .atom() + innerCaptures
return .atom() + innerCaptures
case .namedCapture(let name):
self = .atom(name: name.value) + innerCaptures
return .atom(name: name.value) + innerCaptures
case .balancedCapture(let b):
self = .atom(name: b.name?.value) + innerCaptures
return .atom(name: b.name?.value) + innerCaptures
default:
precondition(!kind.isCapturing)
self = innerCaptures
return innerCaptures
}
}

public init<T: _TreeNode>(
grouping child: T,
public mutating func grouping<T: _TreeNode>(
_ child: T,
as kind: AST.Group.Kind,
withTransform transform: CaptureTransform
) {
let innerCaptures = child.captureStructure
) -> CaptureStructure {
let innerCaptures = child._captureStructure(&self)
switch kind {
case .capture:
self = .atom(type: AnyType(transform.resultType)) + innerCaptures
return .atom(type: AnyType(transform.resultType)) + innerCaptures
case .namedCapture(let name):
self = .atom(name: name.value, type: AnyType(transform.resultType))
return .atom(name: name.value, type: AnyType(transform.resultType))
+ innerCaptures
default:
self = innerCaptures
return innerCaptures
}
}

// TODO: We'll likely want/need a generalization of
// conditional's condition kind.
public init<T: _TreeNode>(
condition: AST.Conditional.Condition.Kind,
public mutating func condition<T: _TreeNode>(
_ condition: AST.Conditional.Condition.Kind,
trueBranch: T,
falseBranch: T
) {
) -> CaptureStructure {
// A conditional's capture structure is effectively that of an alternation
// between the true and false branches. However the condition may also
// have captures in the case of a group condition.
var captures = CaptureStructure.empty
switch condition {
case .group(let g):
captures = captures + AST.Node.group(g).captureStructure
captures = captures + AST.Node.group(g)._captureStructure(&self)
default:
break
}
let branchCaptures = trueBranch.captureStructure +
falseBranch.captureStructure
self = captures + branchCaptures.map(
let branchCaptures = trueBranch._captureStructure(&self) +
falseBranch._captureStructure(&self)
return captures + branchCaptures.map(
CaptureStructure.optional)
}

public init<T: _TreeNode>(
quantifying child: T, amount: AST.Quantification.Amount
) {
self = child.captureStructure.map(
public mutating func quantifying<T: _TreeNode>(
_ child: T, amount: AST.Quantification.Amount
) -> CaptureStructure {
return child._captureStructure(&self).map(
amount == .zeroOrOne
? CaptureStructure.optional
: CaptureStructure.array)
}

// TODO: Will need to adjust for DSLTree support, and
// "absent" isn't the best name for these.
public init(
absent kind: AST.AbsentFunction.Kind
) {
public mutating func absent(
_ kind: AST.AbsentFunction.Kind
) -> CaptureStructure {
// Only the child of an expression absent function is relevant, as the
// other expressions don't actually get matched against.
switch kind {
case .expression(_, _, let child):
self = child.captureStructure
return child._captureStructure(&self)
case .clearer, .repeater, .stopper:
self = .empty
return .empty
}
}

}

extension AST.Node {
public var captureStructure: CaptureStructure {
public func _captureStructure(
_ constructor: inout CaptureStructure.Constructor
) -> CaptureStructure {
guard constructor.strategy == .flatten else {
fatalError("TODO")
}

// Note: This implementation could be more optimized.
switch self {
case let .alternation(a):
return CaptureStructure(alternating: a.children)
return constructor.alternating(a.children)

case let .concatenation(c):
return CaptureStructure(concatenating: c.children)
return constructor.concatenating(c.children)

case let .group(g):
return CaptureStructure(
grouping: g.child, as: g.kind.value)
return constructor.grouping(g.child, as: g.kind.value)

case .groupTransform(let g, let transform):
return CaptureStructure(
grouping: g.child,
return constructor.grouping(
g.child,
as: g.kind.value,
withTransform: transform)

case .conditional(let c):
return CaptureStructure(
condition: c.condition.kind,
return constructor.condition(
c.condition.kind,
trueBranch: c.trueBranch,
falseBranch: c.falseBranch)

case .quantification(let q):
return CaptureStructure(
quantifying: q.child, amount: q.amount.value)
return constructor.quantifying(
q.child, amount: q.amount.value)

case .absentFunction(let abs):
return CaptureStructure(absent: abs.kind)
return constructor.absent(abs.kind)

case .quote, .trivia, .atom, .customCharacterClass, .empty:
return .empty
Expand Down Expand Up @@ -436,3 +460,11 @@ extension CaptureStructure: CustomStringConvertible {
}
}
}

extension CaptureStructure.Constructor {
public enum Strategy {
case flatten
case nest
// case drop(after: Int)...
}
}
4 changes: 3 additions & 1 deletion Sources/_MatchingEngine/Regex/TreeProtocols.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
public protocol _TreeNode {
var children: [Self]? { get }

var captureStructure: CaptureStructure { get }
func _captureStructure(
_: inout CaptureStructure.Constructor
) -> CaptureStructure
}

extension _TreeNode {
Expand Down
27 changes: 21 additions & 6 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,13 @@ extension Compiler.ByteCodeGen {

mutating func emitGroup(
_ kind: AST.Group.Kind, _ child: DSLTree.Node
) throws {
) throws -> CaptureRegister? {
options.beginScope()
defer { options.endScope() }

if let lookaround = kind.lookaroundKind {
try emitLookaround(lookaround, child)
return
return nil
}

switch kind {
Expand All @@ -267,14 +267,17 @@ extension Compiler.ByteCodeGen {
builder.buildBeginCapture(cap)
try emitNode(child)
builder.buildEndCapture(cap)
return cap

case .changeMatchingOptions(let optionSequence, _):
options.apply(optionSequence)
try emitNode(child)
return nil

default:
// FIXME: Other kinds...
try emitNode(child)
return nil
}
}

Expand Down Expand Up @@ -494,7 +497,7 @@ extension Compiler.ByteCodeGen {
}

case let .group(kind, child):
try emitGroup(kind, child)
_ = try emitGroup(kind, child)

case .conditional:
throw Unsupported("Conditionals")
Expand All @@ -518,9 +521,21 @@ extension Compiler.ByteCodeGen {
case let .convertedRegexLiteral(n, _):
try emitNode(n)

case let .groupTransform(kind, child, _):
try emitGroup(kind, child)
// FIXME: Transforms
case let .groupTransform(kind, child, t):
guard let cap = try emitGroup(kind, child) else {
assertionFailure("""
What does it mean to not have a capture to transform?
""")
return
}

// FIXME: Is this how we want to do it?
let transform = builder.makeTransformFunction {
input, range in
t(input[range])
}

builder.buildTransformCapture(cap, transform)

case .absentFunction:
throw Unsupported("absent function")
Expand Down
4 changes: 3 additions & 1 deletion Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ enum Capture {
indirect case some(Capture)
case none(childType: AnyType)
indirect case array([Capture], childType: AnyType)
}

extension Capture {
static func none(childType: Any.Type) -> Capture {
.none(childType: AnyType(childType))
}
Expand Down Expand Up @@ -101,7 +103,7 @@ extension Capture: CustomStringConvertible {
}

case let .some(n):
printer.printBlock("Tuple") { printer in
printer.printBlock("Some") { printer in
n._print(&printer)
}

Expand Down
1 change: 1 addition & 0 deletions Sources/_StringProcessing/Compiler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Compiler {
__consuming func emit() throws -> Program {
// TODO: Handle global options
var codegen = ByteCodeGen(options: options)
codegen.builder.captureStructure = tree.captureStructure
try codegen.emitNode(tree.root)
let program = try codegen.finish()
return program
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Engine/Consume.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ extension Engine where Input == String {
}
guard let result = result else { return nil }

let capList = cpu.storedCaptures.map { $0.history }
let capList = cpu.storedCaptures
return (result, CaptureList(caps: capList))
}
}
Expand Down
9 changes: 9 additions & 0 deletions Sources/_StringProcessing/Engine/InstPayload.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ extension Instruction.Payload {
case packedBoolInt(BoolRegister, IntRegister)
case packedEltBool(ElementRegister, BoolRegister)
case packedPosPos(PositionRegister, PositionRegister)
case packedCapTran(CaptureRegister, TransformRegister)
}
}

Expand Down Expand Up @@ -280,5 +281,13 @@ extension Instruction.Payload {
interpretPair()
}

init(capture: CaptureRegister, transform: TransformRegister) {
self.init(capture, transform)
}
var pairedCaptureTransform: (
CaptureRegister, TransformRegister
) {
interpretPair()
}
}

6 changes: 6 additions & 0 deletions Sources/_StringProcessing/Engine/Instruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ extension Instruction {
///
case endCapture

/// Transform a captured value, saving the built value
///
/// transformCapture(_:CapReg, _:TransformReg)
///
case transformCapture

/// Match a previously captured value
///
/// backreference(_:CapReg)
Expand Down
Loading