Skip to content

Commit 8c35c9d

Browse files
committed
wip: fast path for Ascii character class matching
1 parent 7c16250 commit 8c35c9d

File tree

3 files changed

+53
-18
lines changed

3 files changed

+53
-18
lines changed

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ extension Processor {
44

55
switch payload.type {
66
case .bitset:
7-
return input.matchBitset(
7+
return input.matchASCIIBitset(
88
registers[payload.bitset],
99
at: currentPosition,
1010
limitedBy: end,

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ extension Processor {
291291
_ bitset: DSLTree.CustomCharacterClass.AsciiBitset,
292292
isScalarSemantics: Bool
293293
) -> Bool {
294-
guard let next = input.matchBitset(
294+
guard let next = input.matchASCIIBitset(
295295
bitset,
296296
at: currentPosition,
297297
limitedBy: end,
@@ -727,30 +727,59 @@ extension String {
727727
return idx
728728
}
729729

730-
func matchBitset(
730+
// TODO: effects? release none? conuming self?
731+
private func _getNextIndex(
732+
at pos: Index, isScalarSemantics: Bool, returnNil: Bool
733+
) -> Index? {
734+
assert(pos < endIndex)
735+
if returnNil { return nil }
736+
if isScalarSemantics {
737+
return self.unicodeScalars.index(after: pos)
738+
}
739+
return self.index(after: pos)
740+
}
741+
742+
func matchASCIIBitset(
731743
_ bitset: DSLTree.CustomCharacterClass.AsciiBitset,
732744
at pos: Index,
733745
limitedBy end: Index,
734746
isScalarSemantics: Bool
735747
) -> Index? {
736-
// TODO: extremely quick-check-able
737-
// TODO: can be sped up with string internals
738748
assert(end <= endIndex)
739749

740750
guard pos < end else { return nil }
741751

742-
let idx: String.Index
743-
if isScalarSemantics {
744-
guard bitset.matches(unicodeScalars[pos]) else { return nil }
745-
idx = unicodeScalars.index(after: pos)
746-
} else {
747-
guard bitset.matches(self[pos]) else { return nil }
748-
idx = index(after: pos)
752+
// TODO: Inversion should be tracked and handled in only one place..
753+
let isInverted = bitset.isInverted
754+
755+
// TODO: Want something more specialized, so overhaul/refactor _quickASCIICharacter
756+
guard let (byte, next, isCRLF) = _quickASCIICharacter(at: pos) else {
757+
// FIXME: what if following index is beyond end?
758+
if isScalarSemantics {
759+
return bitset.matches(self.unicodeScalars[pos]) ? self.unicodeScalars.index(after: pos) : nil
760+
}
761+
762+
return bitset.matches(self[pos]) ? self.index(after: pos) : nil
749763
}
750764

751-
guard idx <= end else { return nil }
752-
return idx
753-
}
765+
// TODO: refactor, this checks the inversion property for us
766+
guard bitset.matches(byte) else {
767+
return nil
768+
}
754769

770+
// CR-LF should only match `[\r]` in scalar semantic mode or if inverted
771+
if isCRLF {
772+
// TODO: what if next is past `end` because CRLF?
773+
// FIXME: quickASCIICharacter probably needs a limtedBy argument
774+
if isScalarSemantics {
775+
return self.unicodeScalars.index(before: next)
776+
}
777+
if isInverted {
778+
return next
779+
}
780+
return nil
781+
}
755782

783+
return next
784+
}
756785
}

Sources/_StringProcessing/Utility/AsciiBitset.swift

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,24 @@ extension DSLTree.CustomCharacterClass {
4949
}
5050
}
5151

52-
private func matches(_ val: UInt8) -> Bool {
52+
// Whether the val matches (without checking for inversion) the bitset
53+
internal func _matchesWithoutInversion(_ val: UInt8) -> Bool {
5354
if val < 64 {
5455
return (a >> val) & 1 == 1
5556
} else {
5657
return (b >> (val - 64)) & 1 == 1
5758
}
5859
}
5960

61+
internal func matches(_ byte: UInt8) -> Bool {
62+
guard byte < 128 else { return isInverted }
63+
return _matchesWithoutInversion(byte) == !isInverted
64+
}
65+
6066
internal func matches(_ char: Character) -> Bool {
6167
let matched: Bool
6268
if let val = char._singleScalarAsciiValue {
63-
matched = matches(val)
69+
matched = _matchesWithoutInversion(val)
6470
} else {
6571
matched = false
6672
}
@@ -75,7 +81,7 @@ extension DSLTree.CustomCharacterClass {
7581
let matched: Bool
7682
if scalar.isASCII {
7783
let val = UInt8(ascii: scalar)
78-
matched = matches(val)
84+
matched = _matchesWithoutInversion(val)
7985
} else {
8086
matched = false
8187
}

0 commit comments

Comments
 (0)