Skip to content

Commit 7573373

Browse files
committed
Various fixes and improvements
1 parent 626f567 commit 7573373

File tree

13 files changed

+2640
-78
lines changed

13 files changed

+2640
-78
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ fileprivate extension Compiler.ByteCodeGen {
148148

149149
// Fast path for eliding boundary checks for an all ascii quoted literal
150150
if optimizationsEnabled && s.allSatisfy(\.isASCII) && !s.isEmpty {
151-
builder.buildReverseUnicodeScalar(1)
152151
let lastIdx = s.unicodeScalars.indices.first!
153152
for idx in s.unicodeScalars.indices.reversed() {
154153
let boundaryCheck = idx == lastIdx
@@ -162,7 +161,6 @@ fileprivate extension Compiler.ByteCodeGen {
162161
return
163162
}
164163

165-
builder.buildReverse(1)
166164
for c in s.reversed() { emitCharacter(c) }
167165
}
168166

@@ -223,7 +221,6 @@ fileprivate extension Compiler.ByteCodeGen {
223221

224222
mutating func emitReverseMatchScalar(_ s: UnicodeScalar) {
225223
assert(options.semanticLevel == .unicodeScalar)
226-
builder.buildReverseUnicodeScalar(1)
227224
if options.isCaseInsensitive && s.properties.isCased {
228225
builder.buildReverseMatchScalarCaseInsensitive(s, boundaryCheck: false)
229226
} else {
@@ -254,7 +251,6 @@ fileprivate extension Compiler.ByteCodeGen {
254251
boundaryCheck: true)
255252
} else {
256253
if reverse {
257-
builder.buildReverse(1)
258254
builder.buildReverseMatch(c, isCaseInsensitive: true)
259255
} else {
260256
builder.buildMatch(c, isCaseInsensitive: true)
@@ -269,7 +265,6 @@ fileprivate extension Compiler.ByteCodeGen {
269265
let scalar = c.unicodeScalars[idx]
270266
let boundaryCheck = idx == lastIdx
271267
if reverse {
272-
builder.buildReverseUnicodeScalar(1)
273268
builder.buildReverseMatchScalar(scalar, boundaryCheck: boundaryCheck)
274269
} else {
275270
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
@@ -279,7 +274,6 @@ fileprivate extension Compiler.ByteCodeGen {
279274
}
280275

281276
if reverse {
282-
builder.buildReverse(1)
283277
builder.buildReverseMatch(c, isCaseInsensitive: false)
284278
} else {
285279
builder.buildMatch(c, isCaseInsensitive: false)
@@ -388,6 +382,9 @@ fileprivate extension Compiler.ByteCodeGen {
388382

389383
builder.buildSave(success)
390384
builder.buildSave(intercept)
385+
if reverse {
386+
builder.buildReverse(1)
387+
}
391388
try emitNode(child)
392389
builder.buildClearThrough(intercept)
393390
builder.buildFail(preservingCaptures: true) // Lookahead succeeds here
@@ -417,6 +414,9 @@ fileprivate extension Compiler.ByteCodeGen {
417414

418415
builder.buildSave(success)
419416
builder.buildSave(intercept)
417+
if reverse {
418+
builder.buildReverse(1)
419+
}
420420
try emitNode(child)
421421
builder.buildClearThrough(intercept)
422422
builder.buildClear()
@@ -795,7 +795,6 @@ fileprivate extension Compiler.ByteCodeGen {
795795
return false
796796
}
797797
if reverse {
798-
builder.buildReverse(1)
799798
builder.buildReverseQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
800799
} else {
801800
builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
@@ -809,15 +808,13 @@ fileprivate extension Compiler.ByteCodeGen {
809808
return false
810809
}
811810
if reverse {
812-
builder.buildReverse(1)
813811
builder.buildReverseQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
814812
} else {
815813
builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
816814
}
817815

818816
case .any:
819817
if reverse {
820-
builder.buildReverse(1)
821818
builder.buildReverseQuantifyAny(
822819
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
823820
} else {
@@ -826,7 +823,6 @@ fileprivate extension Compiler.ByteCodeGen {
826823
}
827824
case .anyNonNewline:
828825
if reverse {
829-
builder.buildReverse(1)
830826
builder.buildReverseQuantifyAny(
831827
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
832828
} else {
@@ -835,7 +831,6 @@ fileprivate extension Compiler.ByteCodeGen {
835831
}
836832
case .dot:
837833
if reverse {
838-
builder.buildReverse(1)
839834
builder.buildReverseQuantifyAny(
840835
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
841836
} else {
@@ -847,7 +842,6 @@ fileprivate extension Compiler.ByteCodeGen {
847842
// Custom character class that consumes a single grapheme
848843
let model = cc.asRuntimeModel(options)
849844
if reverse {
850-
builder.buildReverse(1)
851845
builder.buildReverseQuantify(
852846
model: model,
853847
kind,
@@ -914,7 +908,6 @@ fileprivate extension Compiler.ByteCodeGen {
914908
guard let bitset = ccc.asAsciiBitset(options) else {
915909
return false
916910
}
917-
builder.buildReverse(1)
918911
builder.buildReverseQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
919912

920913
case .atom(let atom):
@@ -924,26 +917,21 @@ fileprivate extension Compiler.ByteCodeGen {
924917
guard let val = c._singleScalarAsciiValue else {
925918
return false
926919
}
927-
builder.buildReverse(1)
928920
builder.buildReverseQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
929921

930922
case .any:
931-
builder.buildReverse(1)
932923
builder.buildReverseQuantifyAny(
933924
matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
934925
case .anyNonNewline:
935-
builder.buildReverse(1)
936926
builder.buildReverseQuantifyAny(
937927
matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
938928
case .dot:
939-
builder.buildReverse(1)
940929
builder.buildReverseQuantifyAny(
941930
matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)
942931

943932
case .characterClass(let cc):
944933
// Custom character class that consumes a single grapheme
945934
let model = cc.asRuntimeModel(options)
946-
builder.buildReverse(1)
947935
builder.buildReverseQuantify(
948936
model: model,
949937
kind,

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ extension Instruction {
262262
/// Quantifies the stored instruction in an inner loop instead of looping through instructions in processor
263263
/// Only quantifies specific nodes
264264
///
265-
/// quantify(_:QuantifyPayload)
265+
/// reverseQuantify(_:QuantifyPayload)
266266
///
267267
case reverseQuantify
268268
/// Begin the given capture

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,18 @@ extension MEProgram.Builder {
171171
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false)))
172172
}
173173

174+
mutating func buildReverseConsumeNonNewline() {
175+
instructions.append(.init(.reverseMatchAnyNonNewline, .init(isScalar: false)))
176+
}
177+
174178
mutating func buildConsumeScalarNonNewline() {
175179
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true)))
176180
}
177181

182+
mutating func buildReverseConsumeScalarNonNewline() {
183+
instructions.append(.init(.reverseMatchAnyNonNewline, .init(isScalar: true)))
184+
}
185+
178186
mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
179187
instructions.append(.init(
180188
.match, .init(element: elements.store(e), isCaseInsensitive: isCaseInsensitive)))

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -186,34 +186,34 @@ extension String {
186186
/// boundary of the returned character.
187187
///
188188
/// This function handles loading a character from a string while respecting
189-
/// an end boundary, even if that end boundary is sub-character or sub-scalar.
189+
/// an start boundary, even if that start boundary is sub-character or sub-scalar.
190190
///
191-
/// - If `pos` is at or past `end`, this function returns `nil`.
192-
/// - If `end` is between `pos` and the next grapheme cluster boundary (i.e.,
193-
/// `end` is before `self.index(after: pos)`, then the returned character
191+
/// - If `pos` is at or past `start`, this function returns `nil`.
192+
/// - If `start` is between `pos` and the next grapheme cluster boundary (i.e.,
193+
/// `start` is before `self.index(after: pos)`, then the returned character
194194
/// is smaller than the one that would be produced by `self[pos]` and the
195-
/// returned index is at the end of that character.
196-
/// - If `end` is between `pos` and the next grapheme cluster boundary, and
195+
/// returned index is at the start of that character.
196+
/// - If `start` is between `pos` and the next grapheme cluster boundary, and
197197
/// is not on a Unicode scalar boundary, the partial scalar is dropped. This
198198
/// can result in a `nil` return or a character that includes only part of
199199
/// the `self[pos]` character.
200200
///
201201
/// - Parameters:
202202
/// - pos: The position to load a character from.
203-
/// - end: The limit for the character at `pos`.
204-
/// - Returns: The character at `pos`, bounded by `end`, if it exists, along
203+
/// - start: The limit for the character at `pos`.
204+
/// - Returns: The character at `pos`, bounded by `start`, if it exists, along
205205
/// with the upper bound of that character. The upper bound is always
206206
/// scalar-aligned.
207207
func characterAndStart(at pos: String.Index, limitedBy start: String.Index) -> (Character, String.Index)? {
208208
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
209209
guard pos > start else { return nil }
210-
let next = index(before: pos)
211-
if next >= start {
212-
return (self[pos], next)
210+
let previous = index(before: pos)
211+
if previous >= start {
212+
return (self[pos], previous)
213213
}
214214

215-
// `end` must be a sub-character position that is between `pos` and the
216-
// next grapheme boundary. This is okay if `end` is on a Unicode scalar
215+
// `start` must be a sub-character position that is between `pos` and the
216+
// next grapheme boundary. This is okay if `start` is on a Unicode scalar
217217
// boundary, but if it's in the middle of a scalar's code units, there
218218
// may not be a character to return at all after rounding down. Use
219219
// `Substring`'s rounding to determine what we can return.
@@ -338,7 +338,7 @@ extension String {
338338
guard currentPosition >= start else { return nil }
339339
let scalar = unicodeScalars[currentPosition]
340340
guard !scalar.isNewline else { return nil }
341-
return unicodeScalars.index(after: currentPosition)
341+
return unicodeScalars.index(before: currentPosition)
342342
}
343343

344344
guard let (char, previous) = characterAndStart(at: currentPosition, limitedBy: start),

Sources/_StringProcessing/Engine/MEReverseQuantify.swift

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extension Processor {
4444
}
4545
}
4646

47-
/// Generic quantify instruction interpreter
47+
/// Generic bounded reverseQuantify instruction interpreter
4848
/// - Handles .eager and .posessive
4949
/// - Handles arbitrary minTrips and maxExtraTrips
5050
mutating func runReverseQuantify(_ payload: QuantifyPayload) -> Bool {
@@ -62,11 +62,14 @@ extension Processor {
6262
trips += 1
6363
}
6464

65+
// If we don't have any more trips to take:
6566
if maxExtraTrips == 0 {
6667
// We're done
6768
return true
6869
}
6970

71+
// We've already consumed the minimum number of characters,
72+
// If we can't get another match, the reverse quantify was successful
7073
guard let previous = _doReverseQuantifyMatch(payload) else {
7174
return true
7275
}
@@ -118,12 +121,12 @@ extension Processor {
118121

119122
// Create a quantified save point for every part of the input matched up
120123
// to the final position.
121-
let rangeStart = currentPosition
122-
var rangeEnd = currentPosition
124+
var rangeStart = currentPosition
125+
let rangeEnd = currentPosition
123126
currentPosition = previous
124127
while true {
125128
guard let previous = _doReverseQuantifyMatch(payload) else { break }
126-
rangeEnd = currentPosition
129+
rangeStart = currentPosition
127130
currentPosition = previous
128131
}
129132

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -301,16 +301,16 @@ extension Processor {
301301
mutating func reverseMatch(
302302
_ e: Element, isCaseInsensitive: Bool
303303
) -> Bool {
304-
guard let next = input.match(
304+
guard let previous = input.reverseMatch(
305305
e,
306306
at: currentPosition,
307-
limitedBy: end,
307+
limitedBy: start,
308308
isCaseInsensitive: isCaseInsensitive
309309
) else {
310310
signalFailure()
311311
return false
312312
}
313-
currentPosition = next
313+
currentPosition = previous
314314
return true
315315
}
316316

@@ -358,7 +358,7 @@ extension Processor {
358358
boundaryCheck: Bool,
359359
isCaseInsensitive: Bool
360360
) -> Bool {
361-
guard let next = input.reverseMatchScalar(
361+
guard let previous = input.reverseMatchScalar(
362362
s,
363363
at: currentPosition,
364364
limitedBy: start,
@@ -368,7 +368,7 @@ extension Processor {
368368
signalFailure()
369369
return false
370370
}
371-
currentPosition = next
371+
currentPosition = previous
372372
return true
373373
}
374374

@@ -474,10 +474,6 @@ extension Processor {
474474

475475
controller.pc = pc
476476
currentPosition = pos ?? currentPosition
477-
// if (input.startIndex..<input.endIndex).contains(currentPosition) {
478-
// print("Restoring to: \(input[currentPosition]), \(pc)")
479-
// print("Remaining save points: \(savePoints.map(\.destructure.pc))")
480-
// }
481477
callStack.removeLast(callStack.count - stackEnd.rawValue)
482478
registers.ints = intRegisters
483479
registers.positions = posRegisters
@@ -819,18 +815,6 @@ extension Processor {
819815
storedCaptures[capNum].registerValue(value)
820816
controller.step()
821817
}
822-
823-
// print("==== State after executing \(instructions[controller.pc].description)")
824-
// print("Pos: \(input.distance(from: input.startIndex, to: currentPosition))")
825-
// print("Inst: \(currentPC)")
826-
// let savePointsDescription = savePoints.map { sp in
827-
// let posDescription = if let pos = sp.pos {
828-
// input.distance(from: input.startIndex, to: pos).description
829-
// } else { "" }
830-
//
831-
// return "Pos: \(posDescription), Inst: \(sp.pc)"
832-
// }
833-
// print("Save points: \(savePointsDescription)")
834818
}
835819
}
836820

@@ -984,7 +968,7 @@ extension String {
984968
// meanings in some code paths.
985969
let isInverted = bitset.isInverted
986970

987-
// TODO: More fodder for refactoring `_quickASCIICharacter`, see the comment
971+
// TODO: More fodder for refactoring `_quickASCIICharacter`, see the comment
988972
// there
989973
guard let (asciiByte, next, isCRLF) = _quickASCIICharacter(
990974
at: pos,

Sources/_StringProcessing/Engine/Tracing.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,33 @@ extension Instruction: CustomStringConvertible {
6565
} else {
6666
return "match char[\(reg)]"
6767
}
68+
case .reverseMatch:
69+
let (isCaseInsensitive, reg) = payload.elementPayload
70+
if isCaseInsensitive {
71+
return "reverseMatchCaseInsensitive char[\(reg)]"
72+
} else {
73+
return "reverseMatch char[\(reg)]"
74+
}
6875
case .matchBitset:
6976
let (isScalar, reg) = payload.bitsetPayload
7077
if isScalar {
7178
return "matchBitsetScalar bitset[\(reg)]"
7279
} else {
7380
return "matchBitset bitset[\(reg)]"
7481
}
82+
case .reverseMatchBitset:
83+
let (isScalar, reg) = payload.bitsetPayload
84+
if isScalar {
85+
return "reverseMatchBitsetScalar bitset[\(reg)]"
86+
} else {
87+
return "reverseMatchBitset bitset[\(reg)]"
88+
}
7589
case .matchBuiltin:
7690
let payload = payload.characterClassPayload
7791
return "matchBuiltin \(payload.cc) (\(payload.isInverted))"
92+
case .reverseMatchBuiltin:
93+
let payload = payload.characterClassPayload
94+
return "\(opcode) \(payload.cc) (\(payload.isInverted))"
7895
case .matchBy:
7996
let (matcherReg, valReg) = payload.pairedMatcherValue
8097
return "\(opcode) match[\(matcherReg)] -> val[\(valReg)]"
@@ -101,6 +118,9 @@ extension Instruction: CustomStringConvertible {
101118
case .quantify:
102119
let payload = payload.quantify
103120
return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.maxExtraTrips?.description ?? "unbounded" )"
121+
case .reverseQuantify:
122+
let payload = payload.quantify
123+
return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.maxExtraTrips?.description ?? "unbounded" )"
104124
case .save:
105125
let resumeAddr = payload.addr
106126
return "\(opcode) \(resumeAddr)"

0 commit comments

Comments
 (0)