Skip to content

Commit ec7727b

Browse files
authored
Merge pull request #597 from rctcwyvrn/backreference-in-scalar-mode
Fix matching of backreferences in scalar mode
2 parents 1acca94 + 1f76eb9 commit ec7727b

File tree

5 files changed

+42
-16
lines changed

5 files changed

+42
-16
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ fileprivate extension Compiler.ByteCodeGen {
8484
try emitBackreference(ref.ast)
8585

8686
case let .symbolicReference(id):
87-
builder.buildUnresolvedReference(id: id)
87+
builder.buildUnresolvedReference(
88+
id: id, isScalarMode: options.semanticLevel == .unicodeScalar)
8889

8990
case let .changeMatchingOptions(optionSequence):
9091
if !hasEmittedFirstMatchableAtom {
@@ -143,9 +144,11 @@ fileprivate extension Compiler.ByteCodeGen {
143144
guard let i = n.value else {
144145
throw Unreachable("Expected a value")
145146
}
146-
builder.buildBackreference(.init(i))
147+
builder.buildBackreference(
148+
.init(i), isScalarMode: options.semanticLevel == .unicodeScalar)
147149
case .named(let name):
148-
try builder.buildNamedReference(name)
150+
try builder.buildNamedReference(
151+
name, isScalarMode: options.semanticLevel == .unicodeScalar)
149152
case .relative:
150153
throw Unsupported("Backreference kind: \(ref)")
151154
}

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,20 @@ extension Instruction.Payload {
240240
interpret()
241241
}
242242

243+
init(capture: CaptureRegister, isScalarMode: Bool) {
244+
self.init(isScalarMode ? 1 : 0, capture)
245+
}
246+
var captureAndMode: (isScalarMode: Bool, CaptureRegister) {
247+
let pair: (UInt64, CaptureRegister) = interpretPair()
248+
return (pair.0 == 1, pair.1)
249+
}
243250
init(capture: CaptureRegister) {
244251
self.init(capture)
245252
}
246253
var capture: CaptureRegister {
247254
interpret()
248255
}
249256

250-
251257
// MARK: Packed operand payloads
252258

253259
init(immediate: UInt64, int: IntRegister) {

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,22 +292,23 @@ extension MEProgram.Builder {
292292
}
293293

294294
mutating func buildBackreference(
295-
_ cap: CaptureRegister
295+
_ cap: CaptureRegister,
296+
isScalarMode: Bool
296297
) {
297298
instructions.append(
298-
.init(.backreference, .init(capture: cap)))
299+
.init(.backreference, .init(capture: cap, isScalarMode: isScalarMode)))
299300
}
300301

301-
mutating func buildUnresolvedReference(id: ReferenceID) {
302-
buildBackreference(.init(0))
302+
mutating func buildUnresolvedReference(id: ReferenceID, isScalarMode: Bool) {
303+
buildBackreference(.init(0), isScalarMode: isScalarMode)
303304
unresolvedReferences[id, default: []].append(lastInstructionAddress)
304305
}
305306

306-
mutating func buildNamedReference(_ name: String) throws {
307+
mutating func buildNamedReference(_ name: String, isScalarMode: Bool) throws {
307308
guard let index = captureList.indexOfCapture(named: name) else {
308309
throw RegexCompilationError.uncapturedReference
309310
}
310-
buildBackreference(.init(index))
311+
buildBackreference(.init(index), isScalarMode: isScalarMode)
311312
}
312313

313314
// TODO: Mutating because of fail address fixup, drop when
@@ -456,8 +457,10 @@ fileprivate extension MEProgram.Builder {
456457
throw RegexCompilationError.uncapturedReference
457458
}
458459
for use in uses {
460+
let (isScalarMode, _) = instructions[use.rawValue].payload.captureAndMode
459461
instructions[use.rawValue] =
460-
Instruction(.backreference, .init(capture: .init(offset)))
462+
Instruction(.backreference,
463+
.init(capture: .init(offset), isScalarMode: isScalarMode))
461464
}
462465
}
463466
}

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,17 @@ extension Processor {
231231

232232
// Match against the current input prefix. Returns whether
233233
// it succeeded vs signaling an error.
234-
mutating func matchSeq<C: Collection>(
235-
_ seq: C
236-
) -> Bool where C.Element == Input.Element {
234+
mutating func matchSeq(
235+
_ seq: Substring,
236+
isScalarMode: Bool
237+
) -> Bool {
238+
if isScalarMode {
239+
for s in seq.unicodeScalars {
240+
guard matchScalar(s, boundaryCheck: false) else { return false }
241+
}
242+
return true
243+
}
244+
237245
for e in seq {
238246
guard match(e) else { return false }
239247
}
@@ -584,8 +592,9 @@ extension Processor {
584592
}
585593

586594
case .backreference:
595+
let (isScalarMode, capture) = payload.captureAndMode
587596
let capNum = Int(
588-
asserting: payload.capture.rawValue)
597+
asserting: capture.rawValue)
589598
guard capNum < storedCaptures.count else {
590599
fatalError("Should this be an assert?")
591600
}
@@ -597,7 +606,7 @@ extension Processor {
597606
signalFailure()
598607
return
599608
}
600-
if matchSeq(input[range]) {
609+
if matchSeq(input[range], isScalarMode: isScalarMode) {
601610
controller.step()
602611
}
603612

Tests/RegexTests/MatchTests.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,6 +1643,11 @@ extension RegexTests {
16431643
(input: "123x23", match: "23x23"),
16441644
xfail: true)
16451645

1646+
// Backreferences in scalar mode
1647+
// In scalar mode the backreference should not match
1648+
firstMatchTest(#"(.+)\1"#, input: "ée\u{301}", match: "ée\u{301}")
1649+
firstMatchTest(#"(.+)\1"#, input: "ée\u{301}", match: nil, semanticLevel: .unicodeScalar)
1650+
16461651
// Backreferences in lookaheads
16471652
firstMatchTests(
16481653
#"^(?=.*(.)(.)\2\1).+$"#,

0 commit comments

Comments
 (0)