Skip to content

Simplify Capture and DynamicCaptures storage #177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 17 additions & 18 deletions Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// A tree representing the type of some captures.
public enum CaptureStructure: Equatable {
case atom(name: String? = nil, type: AnyType? = nil)
indirect case array(CaptureStructure)
indirect case optional(CaptureStructure)
indirect case tuple([CaptureStructure])

Expand Down Expand Up @@ -265,8 +264,6 @@ extension CaptureStructure {
return atomType
case .atom(_, type: let type?):
return type.base
case .array(let child):
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
case .optional(let child):
return TypeConstruction.optionalType(of: child.type(withAtomType: atomType))
case .tuple(let children):
Expand All @@ -281,6 +278,20 @@ extension CaptureStructure {
public var type: Any.Type {
type(withAtomType: DefaultAtomType.self)
}

public var atomType: AnyType {
switch self {
case .atom(_, type: nil):
return .init(Substring.self)
case .atom(_, type: let type?):
return type
case .optional(let child):
return child.atomType
case .tuple:
fatalError("Recursive nesting has no single atom type")
}

}
}

// MARK: - Serialization
Expand All @@ -291,7 +302,7 @@ extension CaptureStructure {
case end = 0
case atom = 1
case namedAtom = 2
case formArray = 3
// case formArray = 3
case formOptional = 4
case beginTuple = 5
case endTuple = 6
Expand All @@ -314,7 +325,6 @@ extension CaptureStructure {
/// encode(〚`T`〛) ==> <version>, 〚`T`〛, .end
/// 〚`T` (atom)〛 ==> .atom
/// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
/// 〚`[T]`〛 ==> 〚`T`〛, .formArray
/// 〚`T?`〛 ==> 〚`T`〛, .formOptional
/// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
/// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
Expand All @@ -334,7 +344,8 @@ extension CaptureStructure {
var offset = MemoryLayout<SerializationVersion>.stride
/// Appends a code to the buffer, advancing the offset to the next position.
func append(_ code: Code) {
buffer.storeBytes(of: code, toByteOffset: offset, as: Code.self)
buffer.storeBytes(
of: code.rawValue, toByteOffset: offset, as: UInt8.self)
offset += MemoryLayout<Code>.stride
}
/// Recursively encode the node to the buffer.
Expand All @@ -353,10 +364,6 @@ extension CaptureStructure {
offset += nameCString.count
case .atom(_, _?):
fatalError("Cannot encode a capture structure with explicit types")
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
case .array(let child):
encode(child)
append(.formArray)
// 〚`T?`〛 ==> 〚`T`〛, .formOptional
case .optional(let child):
encode(child)
Expand Down Expand Up @@ -419,9 +426,6 @@ extension CaptureStructure {
let name = String(cString: stringAddress)
offset += name.utf8CString.count
currentScope.append(.atom(name: name))
case .formArray:
let lastIndex = currentScope.endIndex - 1
currentScope[lastIndex] = .array(currentScope[lastIndex])
case .formOptional:
let lastIndex = currentScope.endIndex - 1
currentScope[lastIndex] = .optional(currentScope[lastIndex])
Expand Down Expand Up @@ -454,11 +458,6 @@ extension CaptureStructure: CustomStringConvertible {
: String(describing: type)
printer.print("Atom(\(name): \(type))")

case let .array(c):
printer.printBlock("Array") { printer in
c._print(&printer)
}

case let .optional(c):
printer.printBlock("Optional") { printer in
c._print(&printer)
Expand Down
152 changes: 59 additions & 93 deletions Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,112 +11,78 @@

import _MatchingEngine

// TODO: what here should be in the compile-time module?
/// A structured capture
struct StructuredCapture {
/// The `.optional` height of the result
var optionalCount = 0

enum Capture {
case atom(Any)
indirect case tuple([Capture])
indirect case some(Capture)
case none(childType: AnyType)
indirect case array([Capture], childType: AnyType)
}

extension Capture {
static func none(childType: Any.Type) -> Capture {
.none(childType: AnyType(childType))
}
var storedCapture: StoredCapture?

static func array(_ children: [Capture], childType: Any.Type) -> Capture {
.array(children, childType: AnyType(childType))
var someCount: Int {
storedCapture == nil ? optionalCount - 1 : optionalCount
}
}

extension Capture {
static func tupleOrAtom(_ elements: [Capture]) -> Self {
elements.count == 1 ? elements[0] : .tuple(elements)
}

static var void: Capture {
.tuple([])
}
/// A storage form for a successful capture
struct StoredCapture {
// TODO: drop optional when engine tracks all ranges
var range: Range<String.Index>?

var value: Any {
switch self {
case .atom(let atom):
return atom
case .tuple(let elements):
return TypeConstruction.tuple(
of: elements.map(\.value))
case .array(let elements, let childType):
func helper<T>(_: T.Type) -> Any {
elements.map { $0.value as! T }
}
return _openExistential(childType.base, do: helper)
case .some(let subcapture):
func helper<T>(_ value: T) -> Any {
Optional(value) as Any
}
return _openExistential(subcapture.value, do: helper)
case .none(let childType):
func helper<T>(_: T.Type) -> Any {
nil as T? as Any
}
return _openExistential(childType.base, do: helper)
}
}
// If strongly typed, value is set
var value: Any? = nil
}

private func prepending(_ newElement: Any) -> Self {
switch self {
case .atom, .some, .none, .array:
return .tuple([.atom(newElement), self])
case .tuple(let elements):
return .tuple([.atom(newElement)] + elements)
}
// TODO: Where should this live? Inside TypeConstruction?
func constructExistentialMatchComponent(
from input: Substring,
in range: Range<String.Index>?,
value: Any?,
optionalCount: Int
) -> Any {
let someCount: Int
var underlying: Any
if let v = value {
underlying = v
someCount = optionalCount
} else if let r = range {
underlying = input[r]
someCount = optionalCount
} else {
// Ok since we Any-box every step up the ladder
underlying = Optional<Any>(nil) as Any
someCount = optionalCount - 1
}

func matchValue(withWholeMatch wholeMatch: Substring) -> Any {
prepending(wholeMatch).value
for _ in 0..<someCount {
underlying = Optional(underlying) as Any
}
return underlying
}

extension Capture: CustomStringConvertible {
public var description: String {
var printer = PrettyPrinter()
_print(&printer)
return printer.finish()
extension StructuredCapture {
func existentialMatchComponent(
from input: Substring
) -> Any {
constructExistentialMatchComponent(
from: input,
in: storedCapture?.range,
value: storedCapture?.value,
optionalCount: optionalCount)
}
}

private func _print(_ printer: inout PrettyPrinter) {
switch self {
case let .atom(n):
printer.print("Atom(\(n))")
case let .tuple(ns):
if ns.isEmpty {
printer.print("Tuple()")
return
}

printer.printBlock("Tuple") { printer in
for n in ns {
n._print(&printer)
}
}

case let .some(n):
printer.printBlock("Some") { printer in
n._print(&printer)
}

case let .none(childType):
printer.print("None(\(childType))")

case let .array(ns, childType):
printer.printBlock("Array(\(childType))") { printer in
for n in ns {
n._print(&printer)
}
}

}
extension Sequence where Element == StructuredCapture {
// FIXME: This is a stop gap where we still slice the input
// and traffic through existentials
func existentialMatch(
from input: Substring
) -> Any {
var caps = Array<Any>()
caps.append(input)
caps.append(contentsOf: self.map {
$0.existentialMatchComponent(from: input)
})
return TypeConstruction.tuple(of: caps)
}
}

7 changes: 0 additions & 7 deletions Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,6 @@ extension Processor {
subscript(_ i: ValueRegister) -> Any {
get { values[i.rawValue] }
set {
print("""
values: \(values)
i: \(i)
newValue: \(newValue)
""")
print(values)
print(i)
values[i.rawValue] = newValue
}
}
Expand Down
8 changes: 5 additions & 3 deletions Sources/_StringProcessing/Engine/StringProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ typealias Program = MEProgram<String>

public struct MatchResult {
public var range: Range<String.Index>
var captures: Capture
var captures: [StructuredCapture]

var destructure: (
matched: Range<String.Index>, captures: Capture
matched: Range<String.Index>,
captures: [StructuredCapture]
) {
(range, captures)
}

init(
_ matched: Range<String.Index>, _ captures: Capture
_ matched: Range<String.Index>,
_ captures: [StructuredCapture]
) {
self.range = matched
self.captures = captures
Expand Down
Loading