Skip to content

Commit 5b413ac

Browse files
committed
Track the whole match as a capture in the matching engine.
1 parent 5e05b61 commit 5b413ac

File tree

13 files changed

+71
-72
lines changed

13 files changed

+71
-72
lines changed

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ extension AST.Node {
125125

126126
public var _captureList: CaptureList {
127127
var caps = CaptureList()
128+
caps.append(.init(type: Substring.self, optionalDepth: 0, .fake))
128129
self._addCaptures(to: &caps, optionalNesting: 0)
129130
return caps
130131
}

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ extension RegexValidator {
7777
}
7878
switch ref.kind {
7979
case .absolute(let i):
80-
guard i <= captures.captures.count else {
80+
guard i < captures.captures.count else {
8181
throw error(.invalidReference(i), at: ref.innerLoc)
8282
}
8383
case .named(let name):

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,33 @@ extension Compiler {
44
struct ByteCodeGen {
55
var options: MatchingOptions
66
var builder = Program.Builder()
7+
let wholeMatchRegister: CaptureRegister
78

89
init(options: MatchingOptions, captureList: CaptureList) {
910
self.options = options
1011
self.builder.captureList = captureList
12+
// The first output element is the whole match.
13+
wholeMatchRegister = self.builder.makeCapture(id: nil, name: nil)
1114
}
15+
}
16+
}
1217

13-
mutating func finish(
14-
) throws -> Program {
15-
builder.buildAccept()
16-
return try builder.assemble()
18+
extension Compiler.ByteCodeGen {
19+
mutating func emitRoot(_ root: DSLTree.Node) throws -> Program {
20+
switch root {
21+
case .matcher(_, let f):
22+
emitMatcher(f, into: wholeMatchRegister)
23+
default:
24+
builder.buildBeginCapture(wholeMatchRegister)
25+
try emitNode(root)
26+
builder.buildEndCapture(wholeMatchRegister)
1727
}
28+
builder.buildAccept()
29+
return try builder.assemble()
1830
}
1931
}
2032

21-
extension Compiler.ByteCodeGen {
33+
fileprivate extension Compiler.ByteCodeGen {
2234
mutating func emitAtom(_ a: DSLTree.Atom) throws {
2335
switch a {
2436
case .any:
@@ -65,8 +77,7 @@ extension Compiler.ByteCodeGen {
6577

6678
switch ref.kind {
6779
case .absolute(let i):
68-
// Backreferences number starting at 1
69-
builder.buildBackreference(.init(i-1))
80+
builder.buildBackreference(.init(i))
7081
case .named(let name):
7182
try builder.buildNamedReference(name)
7283
case .relative:
@@ -729,4 +740,3 @@ extension Compiler.ByteCodeGen {
729740
}
730741
}
731742
}
732-

Sources/_StringProcessing/Capture.swift

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
// TODO: Where should this live? Inside TypeConstruction?
1515
func constructExistentialOutputComponent(
16-
from input: Substring,
16+
from input: String,
1717
in range: Range<String.Index>?,
1818
value: Any?,
1919
optionalCount: Int
@@ -43,7 +43,7 @@ func constructExistentialOutputComponent(
4343
@available(SwiftStdlib 5.7, *)
4444
extension AnyRegexOutput.Element {
4545
func existentialOutputComponent(
46-
from input: Substring
46+
from input: String
4747
) -> Any {
4848
constructExistentialOutputComponent(
4949
from: input,
@@ -64,15 +64,13 @@ extension Sequence where Element == AnyRegexOutput.Element {
6464
// FIXME: This is a stop gap where we still slice the input
6565
// and traffic through existentials
6666
@available(SwiftStdlib 5.7, *)
67-
func existentialOutput(
68-
from input: Substring
69-
) -> Any {
70-
var caps = Array<Any>()
71-
caps.append(input)
72-
caps.append(contentsOf: self.map {
67+
func existentialOutput(from input: String) -> Any {
68+
let elements = map {
7369
$0.existentialOutputComponent(from: input)
74-
})
75-
return TypeConstruction.tuple(of: caps)
70+
}
71+
return elements.count == 1
72+
? elements[0]
73+
: TypeConstruction.tuple(of: elements)
7674
}
7775

7876
func slices(from input: String) -> [Substring?] {

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@ class Compiler {
3030
var codegen = ByteCodeGen(
3131
options: options, captureList: tree.root._captureList
3232
)
33-
try codegen.emitNode(tree.root)
34-
let program = try codegen.finish()
35-
return program
33+
return try codegen.emitRoot(tree.root)
3634
}
3735
}
3836

Sources/_StringProcessing/Executor.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ struct Executor {
6262

6363
return .init(
6464
anyRegexOutput: anyRegexOutput,
65-
range: range,
66-
value: value
65+
range: range
6766
)
6867
}
6968

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
@available(SwiftStdlib 5.7, *)
1616
public struct AnyRegexOutput {
1717
internal let input: String
18-
internal let _elements: [ElementRepresentation]
18+
internal var _elements: [ElementRepresentation]
1919
}
2020

2121
@available(SwiftStdlib 5.7, *)
@@ -37,7 +37,7 @@ extension AnyRegexOutput {
3737
as type: Output.Type = Output.self
3838
) -> Output? {
3939
let elements = map {
40-
$0.existentialOutputComponent(from: input[...])
40+
$0.existentialOutputComponent(from: input)
4141
}
4242
return TypeConstruction.tuple(of: elements) as? Output
4343
}
@@ -194,8 +194,7 @@ extension Regex.Match where Output == AnyRegexOutput {
194194
public init<Output>(_ match: Regex<Output>.Match) {
195195
self.init(
196196
anyRegexOutput: match.anyRegexOutput,
197-
range: match.range,
198-
value: match.value
197+
range: match.range
199198
)
200199
}
201200
}
@@ -250,12 +249,12 @@ extension AnyRegexOutput {
250249
@available(SwiftStdlib 5.7, *)
251250
extension AnyRegexOutput.ElementRepresentation {
252251
fileprivate func value(forInput input: String) -> Any {
253-
// Ok for now because `existentialMatchComponent`
254-
// wont slice the input if there's no range to slice with
255-
//
256-
// FIXME: This is ugly :-/
257-
let input = bounds.map { input[$0] } ?? ""
258-
252+
// // Ok for now because `existentialMatchComponent`
253+
// // wont slice the input if there's no range to slice with
254+
// //
255+
// // FIXME: This is ugly :-/
256+
// let input = bounds.map { input[$0] } ?? ""
257+
//
259258
return constructExistentialOutputComponent(
260259
from: input,
261260
in: bounds,

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,9 +524,17 @@ extension DSLTree.Node {
524524

525525
var _captureList: CaptureList {
526526
var list = CaptureList()
527+
list.append(.init(type: wholeMatchType, optionalDepth: 0, .fake))
527528
self._addCaptures(to: &list, optionalNesting: 0)
528529
return list
529530
}
531+
532+
var wholeMatchType: Any.Type {
533+
if case .matcher(let type, _) = self {
534+
return type
535+
}
536+
return Substring.self
537+
}
530538
}
531539

532540
extension DSLTree {

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ extension Regex {
2121

2222
/// The range of the overall match.
2323
public let range: Range<String.Index>
24-
25-
let value: Any?
2624
}
2725
}
2826

@@ -31,34 +29,12 @@ extension Regex.Match {
3129
/// The output produced from the match operation.
3230
public var output: Output {
3331
if Output.self == AnyRegexOutput.self {
34-
let wholeMatchCapture = AnyRegexOutput.ElementRepresentation(
35-
optionalDepth: 0,
36-
bounds: range
37-
)
38-
39-
let output = AnyRegexOutput(
40-
input: anyRegexOutput.input,
41-
elements: [wholeMatchCapture] + anyRegexOutput._elements
42-
)
43-
44-
return output as! Output
45-
} else if Output.self == Substring.self {
46-
// FIXME: Plumb whole match (`.0`) through the matching engine.
47-
return anyRegexOutput.input[range] as! Output
48-
} else if anyRegexOutput.isEmpty, value != nil {
49-
// FIXME: This is a workaround for whole-match values not
50-
// being modeled as part of captures. We might want to
51-
// switch to a model where results are alongside captures
52-
return value! as! Output
53-
} else {
54-
guard value == nil else {
55-
fatalError("FIXME: what would this mean?")
56-
}
57-
let typeErasedMatch = anyRegexOutput.existentialOutput(
58-
from: anyRegexOutput.input[range]
59-
)
60-
return typeErasedMatch as! Output
32+
return anyRegexOutput as! Output
6133
}
34+
let typeErasedMatch = anyRegexOutput.existentialOutput(
35+
from: anyRegexOutput.input
36+
)
37+
return typeErasedMatch as! Output
6238
}
6339

6440
/// Accesses a capture by its name or number.
@@ -83,7 +59,7 @@ extension Regex.Match {
8359
}
8460

8561
return element.existentialOutputComponent(
86-
from: anyRegexOutput.input[...]
62+
from: anyRegexOutput.input
8763
) as! Capture
8864
}
8965
}

Sources/_StringProcessing/Utility/TypeVerification.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ extension Regex {
1818
return true
1919
}
2020

21-
var tupleElements: [Any.Type] = [Substring.self]
22-
var labels = " "
21+
var tupleElements: [Any.Type] = []
22+
var labels = ""
2323

2424
for capture in program.tree.root._captureList.captures {
2525
var captureType: Any.Type = capture.type ?? Substring.self
@@ -41,7 +41,7 @@ extension Regex {
4141

4242
// If we have no captures, then our Regex must be Regex<Substring>.
4343
if tupleElements.count == 1 {
44-
return Output.self == Substring.self
44+
return Output.self == program.tree.root.wholeMatchType
4545
}
4646

4747
let createdType = TypeConstruction.tupleType(

Tests/RegexTests/CaptureTests.swift

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ func captureTest(
159159
line: UInt = #line
160160
) {
161161
let ast = try! parse(regex, .semantic, .traditional)
162-
let capList = ast.root._captureList.withoutLocs
162+
var capList = ast.root._captureList.withoutLocs
163+
// Peel off the whole match element.
164+
capList.captures.removeFirst()
163165
guard capList == expected else {
164166
XCTFail("""
165167
Expected:
@@ -173,7 +175,9 @@ func captureTest(
173175
}
174176

175177
// Ensure DSLTree preserves literal captures
176-
let dslCapList = ast.dslTree.root._captureList
178+
var dslCapList = ast.dslTree.root._captureList
179+
// Peel off the whole match element.
180+
dslCapList.captures.removeFirst()
177181
guard dslCapList == capList else {
178182
XCTFail("""
179183
DSLTree did not preserve structure:
@@ -202,7 +206,9 @@ func captureTest(
202206
return
203207
}
204208

205-
let caps = result.anyRegexOutput
209+
var caps = result.anyRegexOutput
210+
// Peel off the whole match element.
211+
caps._elements.removeFirst()
206212
guard caps.count == output.count else {
207213
XCTFail("""
208214
Mismatch capture count:

Tests/RegexTests/MatchTests.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func flatCaptureTest(
7474
) {
7575
for (test, expect) in tests {
7676
do {
77-
guard let (_, caps) = try? _firstMatch(
77+
guard var (_, caps) = try? _firstMatch(
7878
regex,
7979
input: test,
8080
syntax: syntax,
@@ -86,6 +86,8 @@ func flatCaptureTest(
8686
throw MatchError("Match failed")
8787
}
8888
}
89+
// Peel off the whole match.
90+
caps.removeFirst()
8991
guard let expect = expect else {
9092
throw MatchError("""
9193
Match of \(test) succeeded where failure expected in \(regex)

Tests/RegexTests/ParseTests.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ func parseTest(
9494
file: file, line: line)
9595
return
9696
}
97-
let captures = ast.captureList.withoutLocs
97+
var captures = ast.captureList.withoutLocs
98+
// Peel off the whole match.
99+
captures.captures.removeFirst()
98100
guard captures == expectedCaptures else {
99101
XCTFail("""
100102

0 commit comments

Comments
 (0)