Skip to content

Commit b7b23d3

Browse files
committed
Add instructions for consuming non-newlines and advancing in scalar view
1 parent 1acca94 commit b7b23d3

File tree

6 files changed

+87
-22
lines changed

6 files changed

+87
-22
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -217,27 +217,16 @@ fileprivate extension Compiler.ByteCodeGen {
217217
case .graphemeCluster:
218218
builder.buildAdvance(1)
219219
case .unicodeScalar:
220-
// TODO: builder.buildAdvanceUnicodeScalar(1)
221-
builder.buildConsume { input, bounds in
222-
input.unicodeScalars.index(after: bounds.lowerBound)
223-
}
220+
builder.buildAdvanceUnicodeScalar(1)
224221
}
225222
}
226223

227224
mutating func emitAnyNonNewline() {
228225
switch options.semanticLevel {
229226
case .graphemeCluster:
230-
builder.buildConsume { input, bounds in
231-
input[bounds.lowerBound].isNewline
232-
? nil
233-
: input.index(after: bounds.lowerBound)
234-
}
227+
builder.buildConsumeNonNewline()
235228
case .unicodeScalar:
236-
builder.buildConsume { input, bounds in
237-
input[bounds.lowerBound].isNewline
238-
? nil
239-
: input.unicodeScalars.index(after: bounds.lowerBound)
240-
}
229+
builder.buildConsumeScalarNonNewline()
241230
}
242231
}
243232

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,19 @@ extension Instruction.Payload {
196196
interpret()
197197
}
198198

199-
init(distance: Distance) {
200-
self.init(distance)
199+
init(distance: Distance, isScalarDistance: Bool = false) {
200+
self.init(isScalarDistance ? 1 : 0, distance)
201201
}
202-
var distance: Distance {
203-
interpret()
202+
var distance: (isScalarDistance: Bool, Distance) {
203+
let pair: (UInt64, Distance) = interpretPair()
204+
return (isScalarDistance: pair.0 == 1, pair.1)
205+
}
206+
207+
init(isScalar: Bool) {
208+
self.init(isScalar ? 1 : 0)
209+
}
210+
var isScalar: Bool {
211+
self.rawValue == 1
204212
}
205213

206214
init(bool: BoolRegister) {

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ extension Instruction {
122122
/// - If it is inverted
123123
/// - If it strictly matches only ascii values
124124
case matchBuiltin
125+
126+
/// Matches any non newline character
127+
/// Operand: If we are in scalar mode or not
128+
case matchAnyNonNewline
125129

126130
// MARK: Extension points
127131

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,19 @@ extension MEProgram.Builder {
142142
mutating func buildAdvance(_ n: Distance) {
143143
instructions.append(.init(.advance, .init(distance: n)))
144144
}
145+
146+
mutating func buildAdvanceUnicodeScalar(_ n: Distance) {
147+
instructions.append(
148+
.init(.advance, .init(distance: n, isScalarDistance: true)))
149+
}
150+
151+
mutating func buildConsumeNonNewline() {
152+
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false)))
153+
}
154+
155+
mutating func buildConsumeScalarNonNewline() {
156+
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true)))
157+
}
145158

146159
mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
147160
instructions.append(.init(

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ extension Processor {
181181
currentPosition = idx
182182
return true
183183
}
184+
185+
// Advances in unicode scalar view
186+
mutating func consumeScalar(_ n: Distance) -> Bool {
187+
guard let idx = input.unicodeScalars.index(
188+
currentPosition, offsetBy: n.rawValue, limitedBy: end
189+
) else {
190+
signalFailure()
191+
return false
192+
}
193+
currentPosition = idx
194+
return true
195+
}
184196

185197
/// Continue matching at the specified index.
186198
///
@@ -321,6 +333,26 @@ extension Processor {
321333
return true
322334
}
323335

336+
// Matches the next character if it is not a newline
337+
mutating func matchAnyNonNewline() -> Bool {
338+
guard let c = load(), !c.isNewline else {
339+
signalFailure()
340+
return false
341+
}
342+
_uncheckedForcedConsumeOne()
343+
return true
344+
}
345+
346+
// Matches the next scalar if it is not a newline
347+
mutating func matchAnyNonNewlineScalar() -> Bool {
348+
guard let s = loadScalar(), !s.isNewline else {
349+
signalFailure()
350+
return false
351+
}
352+
input.unicodeScalars.formIndex(after: &currentPosition)
353+
return true
354+
}
355+
324356
mutating func signalFailure() {
325357
guard !savePoints.isEmpty else {
326358
state = .fail
@@ -469,10 +501,26 @@ extension Processor {
469501
signalFailure()
470502

471503
case .advance:
472-
if consume(payload.distance) {
473-
controller.step()
504+
let (isScalar, distance) = payload.distance
505+
if isScalar {
506+
if consumeScalar(distance) {
507+
controller.step()
508+
}
509+
} else {
510+
if consume(distance) {
511+
controller.step()
512+
}
513+
}
514+
case .matchAnyNonNewline:
515+
if payload.isScalar {
516+
if matchAnyNonNewlineScalar() {
517+
controller.step()
518+
}
519+
} else {
520+
if matchAnyNonNewline() {
521+
controller.step()
522+
}
474523
}
475-
476524
case .match:
477525
let (isCaseInsensitive, reg) = payload.elementPayload
478526
if isCaseInsensitive {

Tests/RegexTests/CompileTests.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ enum DecodedInstr {
3737
case matchScalarCaseInsensitive
3838
case matchScalarUnchecked
3939
case matchBitsetScalar
40+
case matchAnyNonNewline
4041
case matchBitset
4142
case matchBuiltin
4243
case consumeBy
@@ -116,7 +117,9 @@ extension DecodedInstr {
116117
return .matchBitset
117118
}
118119
case .consumeBy:
119-
return consumeBy
120+
return .consumeBy
121+
case .matchAnyNonNewline:
122+
return .matchAnyNonNewline
120123
case .assertBy:
121124
return .assertBy
122125
case .matchBy:

0 commit comments

Comments
 (0)