Skip to content

Commit a4e0cc9

Browse files
committed
Clean up
Try fixing reverse quant. Add some unit tests
1 parent 7573373 commit a4e0cc9

File tree

10 files changed

+80
-2619
lines changed

10 files changed

+80
-2619
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,14 @@ public struct NegativeLookahead<Output>: _BuiltinRegexComponent {
227227
}
228228
}
229229

230-
// TODO: Write header doc
231-
@available(SwiftStdlib 5.10, *)
230+
/// A regex component that allows a match to continue only if its contents
231+
/// match at the given location.
232+
///
233+
/// A lookbehind is a zero-length assertion that its included regex matches at
234+
/// a particular position. Lookbehinds do not advance the overall matching
235+
/// position in the input string — once a lookbehind succeeds, matching continues
236+
/// in the regex from the same position.
237+
@available(SwiftStdlib 5.7, *) // TODO: How should this be gated?
232238
public struct Lookbehind<Output>: _BuiltinRegexComponent {
233239
public var regex: Regex<Output>
234240

@@ -251,8 +257,14 @@ public struct Lookbehind<Output>: _BuiltinRegexComponent {
251257
}
252258
}
253259

254-
// TODO: Write header doc
255-
@available(SwiftStdlib 5.10, *)
260+
/// A regex component that allows a match to continue only if its contents
261+
/// do not match at the given location.
262+
///
263+
/// A negative lookbehind is a zero-length assertion that its included regex
264+
/// does not match at a particular position. Lookbehinds do not advance the
265+
/// overall matching position in the input string — once a lookbehind succeeds,
266+
/// matching continues in the regex from the same position.
267+
@available(SwiftStdlib 5.7, *) // TODO: How should this be gated?
256268
public struct NegativeLookbehind<Output>: _BuiltinRegexComponent {
257269
public var regex: Regex<Output>
258270

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ extension RegexValidator {
370370
}
371371
switch kind.value {
372372
case .capture, .namedCapture, .nonCapture, .lookahead, .negativeLookahead,
373-
.atomicNonCapturing, .lookbehind:
373+
.atomicNonCapturing, .lookbehind, .negativeLookbehind:
374374
break
375375

376376
case .balancedCapture:
@@ -384,8 +384,8 @@ extension RegexValidator {
384384
case .nonAtomicLookahead:
385385
error(.unsupported("non-atomic lookahead"), at: kind.location)
386386

387-
case .negativeLookbehind, .nonAtomicLookbehind:
388-
error(.unsupported("lookbehind"), at: kind.location)
387+
case .nonAtomicLookbehind:
388+
error(.unsupported("non-atomic lookbehind"), at: kind.location)
389389

390390
case .scriptRun, .atomicScriptRun:
391391
error(.unsupported("script run"), at: kind.location)

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ func _compileRegex(
103103
case .none:
104104
dsl = ast.dslTree
105105
}
106-
var program = try Compiler(tree: dsl).emit()
107-
program.enableTracing = true
108-
program.enableMetrics = true
106+
let program = try Compiler(tree: dsl).emit()
109107
return Executor(program: program)
110108
}
111109

Sources/_StringProcessing/Engine/MEReverseQuantify.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,15 @@ extension Processor {
5858
signalFailure()
5959
return false
6060
}
61+
6162
currentPosition = previous
63+
64+
// If we've reached the start of the string but still have more trips, fail
65+
if currentPosition == start, trips < payload.minTrips {
66+
signalFailure()
67+
return false
68+
}
69+
6270
trips += 1
6371
}
6472

Sources/_StringProcessing/Engine/Metrics.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
extension Processor {
2-
#if !PROCESSOR_MEASUREMENTS_ENABLED
2+
#if PROCESSOR_MEASUREMENTS_ENABLED
33
struct ProcessorMetrics {
44
var instructionCounts: [Instruction.OpCode: Int] = [:]
55
var backtracks: Int = 0
66
var resets: Int = 0
77
var cycleCount: Int = 0
88

9-
var isTracingEnabled: Bool = true
10-
var shouldMeasureMetrics: Bool = true
9+
var isTracingEnabled: Bool = false
10+
var shouldMeasureMetrics: Bool = false
1111

1212
init(isTracingEnabled: Bool, shouldMeasureMetrics: Bool) {
1313
self.isTracingEnabled = isTracingEnabled
@@ -28,7 +28,7 @@ extension Processor {
2828
extension Processor {
2929

3030
mutating func startCycleMetrics() {
31-
#if !PROCESSOR_MEASUREMENTS_ENABLED
31+
#if PROCESSOR_MEASUREMENTS_ENABLED
3232
if metrics.cycleCount == 0 {
3333
trace()
3434
measureMetrics()
@@ -37,7 +37,7 @@ extension Processor {
3737
}
3838

3939
mutating func endCycleMetrics() {
40-
#if !PROCESSOR_MEASUREMENTS_ENABLED
40+
#if PROCESSOR_MEASUREMENTS_ENABLED
4141
metrics.cycleCount += 1
4242
trace()
4343
measureMetrics()
@@ -49,20 +49,20 @@ extension Processor {
4949
extension Processor.ProcessorMetrics {
5050

5151
mutating func addReset() {
52-
#if !PROCESSOR_MEASUREMENTS_ENABLED
52+
#if PROCESSOR_MEASUREMENTS_ENABLED
5353
self.resets += 1
5454
#endif
5555
}
5656

5757
mutating func addBacktrack() {
58-
#if !PROCESSOR_MEASUREMENTS_ENABLED
58+
#if PROCESSOR_MEASUREMENTS_ENABLED
5959
self.backtracks += 1
6060
#endif
6161
}
6262
}
6363

6464
extension Processor {
65-
#if !PROCESSOR_MEASUREMENTS_ENABLED
65+
#if PROCESSOR_MEASUREMENTS_ENABLED
6666
func printMetrics() {
6767
print("===")
6868
print("Total cycle count: \(metrics.cycleCount)")

Sources/_StringProcessing/Executor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct Executor {
3030
input: input,
3131
subjectBounds: subjectBounds,
3232
searchBounds: searchBounds)
33-
#if !PROCESSOR_MEASUREMENTS_ENABLED
33+
#if PROCESSOR_MEASUREMENTS_ENABLED
3434
defer { if cpu.metrics.shouldMeasureMetrics { cpu.printMetrics() } }
3535
#endif
3636
var low = searchBounds.lowerBound
@@ -60,7 +60,7 @@ struct Executor {
6060
) throws -> Regex<Output>.Match? {
6161
var cpu = engine.makeProcessor(
6262
input: input, bounds: subjectBounds, matchMode: mode)
63-
#if !PROCESSOR_MEASUREMENTS_ENABLED
63+
#if PROCESSOR_MEASUREMENTS_ENABLED
6464
defer { if cpu.metrics.shouldMeasureMetrics { cpu.printMetrics() } }
6565
#endif
6666
return try _match(input, from: subjectBounds.lowerBound, using: &cpu)

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ extension Regex {
106106
/// - Parameter string: The string to match this regular expression against.
107107
/// - Returns: The match, if this regex matches the entirety of `string`;
108108
/// otherwise, `nil`.
109-
// TODO: Reverse
110109
public func wholeMatch(in string: String) throws -> Regex<Output>.Match? {
111110
try _match(string, in: string.startIndex..<string.endIndex, mode: .wholeString)
112111
}
@@ -164,7 +163,6 @@ extension Regex {
164163
///
165164
/// - Parameter string: The string to match this regular expression against.
166165
/// - Returns: The match, if one is found; otherwise, `nil`.
167-
// TODO: Reverse
168166
public func firstMatch(in string: String) throws -> Regex<Output>.Match? {
169167
try _firstMatch(string, in: string.startIndex..<string.endIndex)
170168
}
@@ -199,7 +197,6 @@ extension Regex {
199197
/// against.
200198
/// - Returns: The match, if this regex matches the entirety of `string`;
201199
/// otherwise, `nil`.
202-
// TODO: Reverse
203200
public func wholeMatch(in string: Substring) throws -> Regex<Output>.Match? {
204201
try _match(string.base, in: string.startIndex..<string.endIndex, mode: .wholeString)
205202
}
@@ -259,7 +256,6 @@ extension Regex {
259256
/// - Parameter string: The substring to match this regular expression
260257
/// against.
261258
/// - Returns: The match, if one is found; otherwise, `nil`.
262-
// TODO: Reverse
263259
public func firstMatch(in string: Substring) throws -> Regex<Output>.Match? {
264260
try _firstMatch(string.base, in: string.startIndex..<string.endIndex)
265261
}
@@ -304,7 +300,6 @@ extension BidirectionalCollection where SubSequence == Substring {
304300
/// - Parameter regex: The regular expression to match.
305301
/// - Returns: The match, if one is found. If there is no match, or a
306302
/// transformation in `regex` throws an error, this method returns `nil`.
307-
// TODO: Reverse
308303
public func wholeMatch<R: RegexComponent>(
309304
of regex: R
310305
) -> Regex<R.RegexOutput>.Match? {

Tests/MatchingEngineTests/MatchingEngineTests.swift

Lines changed: 0 additions & 81 deletions
This file was deleted.

Tests/RegexTests/MatchTests.swift

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ func firstMatchTests(
325325
enableTracing: Bool = false,
326326
dumpAST: Bool = false,
327327
xfail: Bool = false,
328+
validateOptimizations: Bool = true,
328329
semanticLevel: RegexSemanticLevel = .graphemeCluster,
329330
file: StaticString = #filePath,
330331
line: UInt = #line
@@ -338,6 +339,7 @@ func firstMatchTests(
338339
enableTracing: enableTracing,
339340
dumpAST: dumpAST,
340341
xfail: xfail,
342+
validateOptimizations: validateOptimizations,
341343
semanticLevel: semanticLevel,
342344
file: file,
343345
line: line)
@@ -1602,27 +1604,57 @@ extension RegexTests {
16021604
(input: "hezllo", match: nil),
16031605
(input: "helloz", match: nil))
16041606

1607+
// MARK: Lookbehinds
16051608
firstMatchTest(
1606-
#"(?<=USD)\d+"#, input: "Price: USD100", match: "100", xfail: true)
1609+
#"(?<=USD)\d+"#, input: "Price: USD100", match: "100")
16071610
firstMatchTest(
1608-
#"(*plb:USD)\d+"#, input: "Price: USD100", match: "100", xfail: true)
1611+
#"(*plb:USD)\d+"#, input: "Price: USD100", match: "100")
16091612
firstMatchTest(
16101613
#"(*positive_lookbehind:USD)\d+"#,
1611-
input: "Price: USD100", match: "100", xfail: true)
1612-
// engines generally enforce that lookbehinds are fixed width
1614+
input: "Price: USD100", match: "100")
1615+
1616+
// TODO: Why is a match not found when unoptimized?
16131617
firstMatchTest(
1614-
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100", xfail: true)
1618+
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100", validateOptimizations: false)
16151619

16161620
firstMatchTest(
1617-
#"(?<!USD)\d+"#, input: "Price: JYP100", match: "100", xfail: true)
1621+
#"(?<!USD)\d+"#, input: "Price: JYP100", match: "100")
16181622
firstMatchTest(
1619-
#"(*nlb:USD)\d+"#, input: "Price: JYP100", match: "100", xfail: true)
1623+
#"(*nlb:USD)\d+"#, input: "Price: JYP100", match: "100")
16201624
firstMatchTest(
16211625
#"(*negative_lookbehind:USD)\d+"#,
1622-
input: "Price: JYP100", match: "100", xfail: true)
1623-
// engines generally enforce that lookbehinds are fixed width
1626+
input: "Price: JYP100", match: "100")
1627+
1628+
firstMatchTest(
1629+
#"\d{3}(?<!USD\d{3})"#, input: "Price: JYP100", match: "100")
1630+
1631+
firstMatchTest(#"(?<=abc)def"#, input: "abcdefg", match: "def", validateOptimizations: false)
1632+
firstMatchTests(
1633+
#"(?<=az|b|c)def"#,
1634+
("azdefg", "def"),
1635+
("bdefg", "def"),
1636+
("cdefg", "def"),
1637+
("123defg", nil),
1638+
validateOptimizations: false
1639+
)
1640+
1641+
// FIXME: quickMatch and thoroughMatch have different results
16241642
firstMatchTest(
1625-
#"\d{3}(?<!USD\d{3})"#, input: "Price: JYP100", match: "100", xfail: true)
1643+
#"(?<=\d{1,3}-.{1,3}-\d{1,3})suffix"#,
1644+
input: "123-_+/-789suffix",
1645+
match: "suffix",
1646+
validateOptimizations: false
1647+
)
1648+
1649+
firstMatchTests(
1650+
#"(?<=^\d{1,3})abc"#,
1651+
("123abc", "abc"),
1652+
("12abc", "abc"),
1653+
("1abc", "abc"),
1654+
("1234abc", nil), // FIXME: Shouldn't match but does because `^` assertions are broken
1655+
("z123abc", nil), // FIXME: Same as above
1656+
validateOptimizations: false
1657+
)
16261658
}
16271659

16281660
func testMatchAnchors() throws {

0 commit comments

Comments
 (0)