Skip to content

Commit 08ce1de

Browse files
committed
Working? Solution
1 parent db96f7c commit 08ce1de

File tree

15 files changed

+576
-87
lines changed

15 files changed

+576
-87
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,52 @@ public struct NegativeLookahead<Output>: _BuiltinRegexComponent {
226226
self.init(_RegexFactory().negativeLookaheadNonCapturing(component()))
227227
}
228228
}
229+
230+
// TODO: Write header doc
231+
@available(SwiftStdlib 5.10, *)
232+
public struct Lookbehind<Output>: _BuiltinRegexComponent {
233+
public var regex: Regex<Output>
234+
235+
init(_ regex: Regex<Output>) {
236+
self.regex = regex
237+
}
238+
239+
/// Creates a lookbehind from the given regex component.
240+
public init<R: RegexComponent>(
241+
_ component: R
242+
) where R.RegexOutput == Output {
243+
self.init(_RegexFactory().lookbehindNonCapturing(component))
244+
}
245+
246+
/// Creates a lookbehind from the regex generated by the given builder closure.
247+
public init<R: RegexComponent>(
248+
@RegexComponentBuilder _ component: () -> R
249+
) where R.RegexOutput == Output {
250+
self.init(_RegexFactory().lookbehindNonCapturing(component()))
251+
}
252+
}
253+
254+
// TODO: Write header doc
255+
@available(SwiftStdlib 5.10, *)
256+
public struct NegativeLookbehind<Output>: _BuiltinRegexComponent {
257+
public var regex: Regex<Output>
258+
259+
init(_ regex: Regex<Output>) {
260+
self.regex = regex
261+
}
262+
263+
/// Creates a negative lookbehind from the given regex component.
264+
public init<R: RegexComponent>(
265+
_ component: R
266+
) where R.RegexOutput == Output {
267+
self.init(_RegexFactory().negativeLookbehindNonCapturing(component))
268+
}
269+
270+
/// Creates a negative lookbehind from the regex generated by the given builder
271+
/// closure.
272+
public init<R: RegexComponent>(
273+
@RegexComponentBuilder _ component: () -> R
274+
) where R.RegexOutput == Output {
275+
self.init(_RegexFactory().negativeLookbehindNonCapturing(component()))
276+
}
277+
}

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ extension RegexValidator {
370370
}
371371
switch kind.value {
372372
case .capture, .namedCapture, .nonCapture, .lookahead, .negativeLookahead,
373-
.atomicNonCapturing:
373+
.atomicNonCapturing, .lookbehind:
374374
break
375375

376376
case .balancedCapture:
@@ -384,7 +384,7 @@ extension RegexValidator {
384384
case .nonAtomicLookahead:
385385
error(.unsupported("non-atomic lookahead"), at: kind.location)
386386

387-
case .lookbehind, .negativeLookbehind, .nonAtomicLookbehind:
387+
case .negativeLookbehind, .nonAtomicLookbehind:
388388
error(.unsupported("lookbehind"), at: kind.location)
389389

390390
case .scriptRun, .atomicScriptRun:

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import Swift
1616

1717
extension Compiler {
1818
struct ByteCodeGen {
19+
var reverse = false
1920
var options: MatchingOptions
2021
var builder = MEProgram.Builder()
2122
/// A Boolean indicating whether the first matchable atom has been emitted.
@@ -135,6 +136,36 @@ fileprivate extension Compiler.ByteCodeGen {
135136
for c in s { emitCharacter(c) }
136137
}
137138

139+
mutating func emitReverseQuotedLiteral(_ s: String) {
140+
guard options.semanticLevel == .graphemeCluster else {
141+
for char in s {
142+
for scalar in char.unicodeScalars.reversed() {
143+
emitMatchScalar(scalar)
144+
}
145+
}
146+
return
147+
}
148+
149+
// Fast path for eliding boundary checks for an all ascii quoted literal
150+
if optimizationsEnabled && s.allSatisfy(\.isASCII) && !s.isEmpty {
151+
builder.buildReverseUnicodeScalar(1)
152+
let lastIdx = s.unicodeScalars.indices.first!
153+
for idx in s.unicodeScalars.indices.reversed() {
154+
let boundaryCheck = idx == lastIdx
155+
let scalar = s.unicodeScalars[idx]
156+
if options.isCaseInsensitive && scalar.properties.isCased {
157+
builder.buildReverseMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck)
158+
} else {
159+
builder.buildReverseMatchScalar(scalar, boundaryCheck: boundaryCheck)
160+
}
161+
}
162+
return
163+
}
164+
165+
builder.buildReverse(1)
166+
for c in s.reversed() { emitCharacter(c) }
167+
}
168+
138169
mutating func emitBackreference(
139170
_ ref: AST.Reference
140171
) throws {
@@ -189,12 +220,26 @@ fileprivate extension Compiler.ByteCodeGen {
189220
builder.buildMatchScalar(s, boundaryCheck: false)
190221
}
191222
}
192-
223+
224+
mutating func emitReverseMatchScalar(_ s: UnicodeScalar) {
225+
assert(options.semanticLevel == .unicodeScalar)
226+
builder.buildReverseUnicodeScalar(1)
227+
if options.isCaseInsensitive && s.properties.isCased {
228+
builder.buildReverseMatchScalarCaseInsensitive(s, boundaryCheck: false)
229+
} else {
230+
builder.buildReverseMatchScalar(s, boundaryCheck: false)
231+
}
232+
}
233+
193234
mutating func emitCharacter(_ c: Character) {
194235
// Unicode scalar mode matches the specific scalars that comprise a character
195236
if options.semanticLevel == .unicodeScalar {
196237
for scalar in c.unicodeScalars {
197-
emitMatchScalar(scalar)
238+
if reverse {
239+
emitReverseMatchScalar(scalar)
240+
} else {
241+
emitMatchScalar(scalar)
242+
}
198243
}
199244
return
200245
}
@@ -208,20 +253,37 @@ fileprivate extension Compiler.ByteCodeGen {
208253
c.unicodeScalars.last!,
209254
boundaryCheck: true)
210255
} else {
211-
builder.buildMatch(c, isCaseInsensitive: true)
256+
if reverse {
257+
builder.buildReverse(1)
258+
builder.buildReverseMatch(c, isCaseInsensitive: true)
259+
} else {
260+
builder.buildMatch(c, isCaseInsensitive: true)
261+
}
212262
}
213263
return
214264
}
215265

216266
if optimizationsEnabled && c.isASCII {
217267
let lastIdx = c.unicodeScalars.indices.last!
218268
for idx in c.unicodeScalars.indices {
219-
builder.buildMatchScalar(c.unicodeScalars[idx], boundaryCheck: idx == lastIdx)
269+
let scalar = c.unicodeScalars[idx]
270+
let boundaryCheck = idx == lastIdx
271+
if reverse {
272+
builder.buildReverseUnicodeScalar(1)
273+
builder.buildReverseMatchScalar(scalar, boundaryCheck: boundaryCheck)
274+
} else {
275+
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
276+
}
220277
}
221278
return
222279
}
223280

224-
builder.buildMatch(c, isCaseInsensitive: false)
281+
if reverse {
282+
builder.buildReverse(1)
283+
builder.buildReverseMatch(c, isCaseInsensitive: false)
284+
} else {
285+
builder.buildMatch(c, isCaseInsensitive: false)
286+
}
225287
}
226288

227289
mutating func emitAny() {
@@ -308,7 +370,7 @@ fileprivate extension Compiler.ByteCodeGen {
308370
try emitNode(node)
309371
}
310372

311-
mutating func emitPositiveLookahead(_ child: DSLTree.Node) throws {
373+
mutating func emitPositiveLookaround(_ child: DSLTree.Node) throws {
312374
/*
313375
save(restoringAt: success)
314376
save(restoringAt: intercept)
@@ -337,7 +399,7 @@ fileprivate extension Compiler.ByteCodeGen {
337399
builder.label(success)
338400
}
339401

340-
mutating func emitNegativeLookahead(_ child: DSLTree.Node) throws {
402+
mutating func emitNegativeLookaround(_ child: DSLTree.Node) throws {
341403
/*
342404
save(restoringAt: success)
343405
save(restoringAt: intercept)
@@ -365,19 +427,18 @@ fileprivate extension Compiler.ByteCodeGen {
365427

366428
builder.label(success)
367429
}
368-
430+
369431
mutating func emitLookaround(
370432
_ kind: (forwards: Bool, positive: Bool),
371433
_ child: DSLTree.Node
372434
) throws {
373-
guard kind.forwards else {
374-
throw Unsupported("backwards assertions")
375-
}
435+
reverse = !kind.forwards
376436
if kind.positive {
377-
try emitPositiveLookahead(child)
437+
try emitPositiveLookaround(child)
378438
} else {
379-
try emitNegativeLookahead(child)
439+
try emitNegativeLookaround(child)
380440
}
441+
reverse = false
381442
}
382443

383444
mutating func emitAtomicNoncapturingGroup(
@@ -438,15 +499,14 @@ fileprivate extension Compiler.ByteCodeGen {
438499
options.beginScope()
439500
defer { options.endScope() }
440501

441-
if let lookaround = kind.lookaroundKind {
442-
try emitLookaround(lookaround, child)
443-
return
444-
}
445-
446502
switch kind {
447503
case .lookahead, .negativeLookahead,
448504
.lookbehind, .negativeLookbehind:
449-
throw Unreachable("TODO: reason")
505+
guard let lookaround = kind.lookaroundKind else {
506+
throw Unreachable("TODO: reason")
507+
}
508+
509+
try emitLookaround(lookaround, child)
450510

451511
case .capture, .namedCapture, .balancedCapture:
452512
throw Unreachable("These should produce a capture node")
@@ -1161,7 +1221,7 @@ fileprivate extension Compiler.ByteCodeGen {
11611221
return [node]
11621222
}
11631223
}
1164-
let children = children
1224+
var children = children
11651225
.flatMap(flatten)
11661226
.coalescing(with: "", into: DSLTree.Node.quotedLiteral) { str, node in
11671227
switch node {
@@ -1180,6 +1240,9 @@ fileprivate extension Compiler.ByteCodeGen {
11801240
return false
11811241
}
11821242
}
1243+
if reverse {
1244+
children.reverse()
1245+
}
11831246
for child in children {
11841247
try emitConcatenationComponent(child)
11851248
}
@@ -1188,7 +1251,6 @@ fileprivate extension Compiler.ByteCodeGen {
11881251
@discardableResult
11891252
mutating func emitNode(_ node: DSLTree.Node) throws -> ValueRegister? {
11901253
switch node {
1191-
11921254
case let .orderedChoice(children):
11931255
try emitAlternation(children)
11941256

@@ -1256,7 +1318,11 @@ fileprivate extension Compiler.ByteCodeGen {
12561318
try emitAtom(a)
12571319

12581320
case let .quotedLiteral(s):
1259-
emitQuotedLiteral(s)
1321+
if reverse {
1322+
emitReverseQuotedLiteral(s)
1323+
} else {
1324+
emitQuotedLiteral(s)
1325+
}
12601326

12611327
case let .convertedRegexLiteral(n, _):
12621328
return try emitNode(n)

Sources/_StringProcessing/Compiler.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ func _compileRegex(
103103
case .none:
104104
dsl = ast.dslTree
105105
}
106-
let program = try Compiler(tree: dsl).emit()
106+
var program = try Compiler(tree: dsl).emit()
107+
program.enableTracing = true
108+
program.enableMetrics = true
107109
return Executor(program: program)
108110
}
109111

0 commit comments

Comments
 (0)