Skip to content

Commit 0d65e2e

Browse files
authored
Simplify Capture and DynamicCaptures storage (#177)
* Simplify Capture and DynamicCaptures storage Change to an array of structs instead of recursive indirect enums.
1 parent 17adc2c commit 0d65e2e

File tree

11 files changed

+402
-439
lines changed

11 files changed

+402
-439
lines changed

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
// A tree representing the type of some captures.
1313
public enum CaptureStructure: Equatable {
1414
case atom(name: String? = nil, type: AnyType? = nil)
15-
indirect case array(CaptureStructure)
1615
indirect case optional(CaptureStructure)
1716
indirect case tuple([CaptureStructure])
1817

@@ -259,8 +258,6 @@ extension CaptureStructure {
259258
return atomType
260259
case .atom(_, type: let type?):
261260
return type.base
262-
case .array(let child):
263-
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
264261
case .optional(let child):
265262
return TypeConstruction.optionalType(of: child.type(withAtomType: atomType))
266263
case .tuple(let children):
@@ -275,6 +272,20 @@ extension CaptureStructure {
275272
public var type: Any.Type {
276273
type(withAtomType: DefaultAtomType.self)
277274
}
275+
276+
public var atomType: AnyType {
277+
switch self {
278+
case .atom(_, type: nil):
279+
return .init(Substring.self)
280+
case .atom(_, type: let type?):
281+
return type
282+
case .optional(let child):
283+
return child.atomType
284+
case .tuple:
285+
fatalError("Recursive nesting has no single atom type")
286+
}
287+
288+
}
278289
}
279290

280291
// MARK: - Serialization
@@ -285,7 +296,7 @@ extension CaptureStructure {
285296
case end = 0
286297
case atom = 1
287298
case namedAtom = 2
288-
case formArray = 3
299+
// case formArray = 3
289300
case formOptional = 4
290301
case beginTuple = 5
291302
case endTuple = 6
@@ -308,7 +319,6 @@ extension CaptureStructure {
308319
/// encode(〚`T`〛) ==> <version>, 〚`T`〛, .end
309320
/// 〚`T` (atom)〛 ==> .atom
310321
/// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
311-
/// 〚`[T]`〛 ==> 〚`T`〛, .formArray
312322
/// 〚`T?`〛 ==> 〚`T`〛, .formOptional
313323
/// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
314324
/// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
@@ -328,7 +338,8 @@ extension CaptureStructure {
328338
var offset = MemoryLayout<SerializationVersion>.stride
329339
/// Appends a code to the buffer, advancing the offset to the next position.
330340
func append(_ code: Code) {
331-
buffer.storeBytes(of: code, toByteOffset: offset, as: Code.self)
341+
buffer.storeBytes(
342+
of: code.rawValue, toByteOffset: offset, as: UInt8.self)
332343
offset += MemoryLayout<Code>.stride
333344
}
334345
/// Recursively encode the node to the buffer.
@@ -347,10 +358,6 @@ extension CaptureStructure {
347358
offset += nameCString.count
348359
case .atom(_, _?):
349360
fatalError("Cannot encode a capture structure with explicit types")
350-
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
351-
case .array(let child):
352-
encode(child)
353-
append(.formArray)
354361
// 〚`T?`〛 ==> 〚`T`〛, .formOptional
355362
case .optional(let child):
356363
encode(child)
@@ -413,9 +420,6 @@ extension CaptureStructure {
413420
let name = String(cString: stringAddress)
414421
offset += name.utf8CString.count
415422
currentScope.append(.atom(name: name))
416-
case .formArray:
417-
let lastIndex = currentScope.endIndex - 1
418-
currentScope[lastIndex] = .array(currentScope[lastIndex])
419423
case .formOptional:
420424
let lastIndex = currentScope.endIndex - 1
421425
currentScope[lastIndex] = .optional(currentScope[lastIndex])
@@ -448,11 +452,6 @@ extension CaptureStructure: CustomStringConvertible {
448452
: String(describing: type)
449453
printer.print("Atom(\(name): \(type))")
450454

451-
case let .array(c):
452-
printer.printBlock("Array") { printer in
453-
c._print(&printer)
454-
}
455-
456455
case let .optional(c):
457456
printer.printBlock("Optional") { printer in
458457
c._print(&printer)

Sources/_StringProcessing/Capture.swift

Lines changed: 59 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -11,112 +11,78 @@
1111

1212
import _MatchingEngine
1313

14-
// TODO: what here should be in the compile-time module?
14+
/// A structured capture
15+
struct StructuredCapture {
16+
/// The `.optional` height of the result
17+
var optionalCount = 0
1518

16-
enum Capture {
17-
case atom(Any)
18-
indirect case tuple([Capture])
19-
indirect case some(Capture)
20-
case none(childType: AnyType)
21-
indirect case array([Capture], childType: AnyType)
22-
}
23-
24-
extension Capture {
25-
static func none(childType: Any.Type) -> Capture {
26-
.none(childType: AnyType(childType))
27-
}
19+
var storedCapture: StoredCapture?
2820

29-
static func array(_ children: [Capture], childType: Any.Type) -> Capture {
30-
.array(children, childType: AnyType(childType))
21+
var someCount: Int {
22+
storedCapture == nil ? optionalCount - 1 : optionalCount
3123
}
3224
}
3325

34-
extension Capture {
35-
static func tupleOrAtom(_ elements: [Capture]) -> Self {
36-
elements.count == 1 ? elements[0] : .tuple(elements)
37-
}
38-
39-
static var void: Capture {
40-
.tuple([])
41-
}
26+
/// A storage form for a successful capture
27+
struct StoredCapture {
28+
// TODO: drop optional when engine tracks all ranges
29+
var range: Range<String.Index>?
4230

43-
var value: Any {
44-
switch self {
45-
case .atom(let atom):
46-
return atom
47-
case .tuple(let elements):
48-
return TypeConstruction.tuple(
49-
of: elements.map(\.value))
50-
case .array(let elements, let childType):
51-
func helper<T>(_: T.Type) -> Any {
52-
elements.map { $0.value as! T }
53-
}
54-
return _openExistential(childType.base, do: helper)
55-
case .some(let subcapture):
56-
func helper<T>(_ value: T) -> Any {
57-
Optional(value) as Any
58-
}
59-
return _openExistential(subcapture.value, do: helper)
60-
case .none(let childType):
61-
func helper<T>(_: T.Type) -> Any {
62-
nil as T? as Any
63-
}
64-
return _openExistential(childType.base, do: helper)
65-
}
66-
}
31+
// If strongly typed, value is set
32+
var value: Any? = nil
33+
}
6734

68-
private func prepending(_ newElement: Any) -> Self {
69-
switch self {
70-
case .atom, .some, .none, .array:
71-
return .tuple([.atom(newElement), self])
72-
case .tuple(let elements):
73-
return .tuple([.atom(newElement)] + elements)
74-
}
35+
// TODO: Where should this live? Inside TypeConstruction?
36+
func constructExistentialMatchComponent(
37+
from input: Substring,
38+
in range: Range<String.Index>?,
39+
value: Any?,
40+
optionalCount: Int
41+
) -> Any {
42+
let someCount: Int
43+
var underlying: Any
44+
if let v = value {
45+
underlying = v
46+
someCount = optionalCount
47+
} else if let r = range {
48+
underlying = input[r]
49+
someCount = optionalCount
50+
} else {
51+
// Ok since we Any-box every step up the ladder
52+
underlying = Optional<Any>(nil) as Any
53+
someCount = optionalCount - 1
7554
}
7655

77-
func matchValue(withWholeMatch wholeMatch: Substring) -> Any {
78-
prepending(wholeMatch).value
56+
for _ in 0..<someCount {
57+
underlying = Optional(underlying) as Any
7958
}
59+
return underlying
8060
}
8161

82-
extension Capture: CustomStringConvertible {
83-
public var description: String {
84-
var printer = PrettyPrinter()
85-
_print(&printer)
86-
return printer.finish()
62+
extension StructuredCapture {
63+
func existentialMatchComponent(
64+
from input: Substring
65+
) -> Any {
66+
constructExistentialMatchComponent(
67+
from: input,
68+
in: storedCapture?.range,
69+
value: storedCapture?.value,
70+
optionalCount: optionalCount)
8771
}
72+
}
8873

89-
private func _print(_ printer: inout PrettyPrinter) {
90-
switch self {
91-
case let .atom(n):
92-
printer.print("Atom(\(n))")
93-
case let .tuple(ns):
94-
if ns.isEmpty {
95-
printer.print("Tuple()")
96-
return
97-
}
98-
99-
printer.printBlock("Tuple") { printer in
100-
for n in ns {
101-
n._print(&printer)
102-
}
103-
}
104-
105-
case let .some(n):
106-
printer.printBlock("Some") { printer in
107-
n._print(&printer)
108-
}
109-
110-
case let .none(childType):
111-
printer.print("None(\(childType))")
112-
113-
case let .array(ns, childType):
114-
printer.printBlock("Array(\(childType))") { printer in
115-
for n in ns {
116-
n._print(&printer)
117-
}
118-
}
119-
120-
}
74+
extension Sequence where Element == StructuredCapture {
75+
// FIXME: This is a stop gap where we still slice the input
76+
// and traffic through existentials
77+
func existentialMatch(
78+
from input: Substring
79+
) -> Any {
80+
var caps = Array<Any>()
81+
caps.append(input)
82+
caps.append(contentsOf: self.map {
83+
$0.existentialMatchComponent(from: input)
84+
})
85+
return TypeConstruction.tuple(of: caps)
12186
}
12287
}
88+

Sources/_StringProcessing/Engine/Registers.swift

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,6 @@ extension Processor {
8888
subscript(_ i: ValueRegister) -> Any {
8989
get { values[i.rawValue] }
9090
set {
91-
print("""
92-
values: \(values)
93-
i: \(i)
94-
newValue: \(newValue)
95-
""")
96-
print(values)
97-
print(i)
9891
values[i.rawValue] = newValue
9992
}
10093
}

Sources/_StringProcessing/Engine/StringProcessor.swift

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,18 @@ typealias Program = MEProgram<String>
1414

1515
public struct MatchResult {
1616
public var range: Range<String.Index>
17-
var captures: Capture
17+
var captures: [StructuredCapture]
1818

1919
var destructure: (
20-
matched: Range<String.Index>, captures: Capture
20+
matched: Range<String.Index>,
21+
captures: [StructuredCapture]
2122
) {
2223
(range, captures)
2324
}
2425

2526
init(
26-
_ matched: Range<String.Index>, _ captures: Capture
27+
_ matched: Range<String.Index>,
28+
_ captures: [StructuredCapture]
2729
) {
2830
self.range = matched
2931
self.captures = captures

0 commit comments

Comments
 (0)