@@ -291,7 +291,7 @@ extension Processor {
291
291
_ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
292
292
isScalarSemantics: Bool
293
293
) -> Bool {
294
- guard let next = input. matchBitset (
294
+ guard let next = input. matchASCIIBitset (
295
295
bitset,
296
296
at: currentPosition,
297
297
limitedBy: end,
@@ -727,30 +727,59 @@ extension String {
727
727
return idx
728
728
}
729
729
730
- func matchBitset(
730
+ // TODO: effects? release none? conuming self?
731
+ private func _getNextIndex(
732
+ at pos: Index , isScalarSemantics: Bool , returnNil: Bool
733
+ ) -> Index ? {
734
+ assert ( pos < endIndex)
735
+ if returnNil { return nil }
736
+ if isScalarSemantics {
737
+ return self . unicodeScalars. index ( after: pos)
738
+ }
739
+ return self . index ( after: pos)
740
+ }
741
+
742
+ func matchASCIIBitset(
731
743
_ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
732
744
at pos: Index ,
733
745
limitedBy end: Index ,
734
746
isScalarSemantics: Bool
735
747
) -> Index ? {
736
- // TODO: extremely quick-check-able
737
- // TODO: can be sped up with string internals
738
748
assert ( end <= endIndex)
739
749
740
750
guard pos < end else { return nil }
741
751
742
- let idx : String . Index
743
- if isScalarSemantics {
744
- guard bitset. matches ( unicodeScalars [ pos] ) else { return nil }
745
- idx = unicodeScalars. index ( after: pos)
746
- } else {
747
- guard bitset. matches ( self [ pos] ) else { return nil }
748
- idx = index ( after: pos)
752
+ // TODO: Inversion should be tracked and handled in only one place..
753
+ let isInverted = bitset. isInverted
754
+
755
+ // TODO: Want something more specialized, so overhaul/refactor _quickASCIICharacter
756
+ guard let ( byte, next, isCRLF) = _quickASCIICharacter ( at: pos) else {
757
+ // FIXME: what if following index is beyond end?
758
+ if isScalarSemantics {
759
+ return bitset. matches ( self . unicodeScalars [ pos] ) ? self . unicodeScalars. index ( after: pos) : nil
760
+ }
761
+
762
+ return bitset. matches ( self [ pos] ) ? self . index ( after: pos) : nil
749
763
}
750
764
751
- guard idx <= end else { return nil }
752
- return idx
753
- }
765
+ // TODO: refactor, this checks the inversion property for us
766
+ guard bitset. matches ( byte) else {
767
+ return nil
768
+ }
754
769
770
+ // CR-LF should only match `[\r]` in scalar semantic mode or if inverted
771
+ if isCRLF {
772
+ // TODO: what if next is past `end` because CRLF?
773
+ // FIXME: quickASCIICharacter probably needs a limtedBy argument
774
+ if isScalarSemantics {
775
+ return self . unicodeScalars. index ( before: next)
776
+ }
777
+ if isInverted {
778
+ return next
779
+ }
780
+ return nil
781
+ }
755
782
783
+ return next
784
+ }
756
785
}
0 commit comments