9
9
//
10
10
//===----------------------------------------------------------------------===//
11
11
12
- /// An implementation of the Boyer-Moore algorithm, for string-specific
12
+ /// An implementation of the Boyer-Moore-Horspool algorithm, for string-specific
13
13
/// searching.
14
14
@usableFromInline
15
15
struct SubstringSearcher : Sequence , IteratorProtocol {
@@ -22,60 +22,152 @@ struct SubstringSearcher: Sequence, IteratorProtocol {
22
22
@usableFromInline
23
23
let patternCount : Int
24
24
@usableFromInline
25
- var endOfSearch : String . Index ?
25
+ var endOfNextPotentialMatch : String . Index ?
26
26
27
27
@usableFromInline
28
28
init ( text: Substring , pattern: Substring ) {
29
29
self . text = text
30
30
self . pattern = pattern
31
31
self . patternCount = pattern. count
32
- self . endOfSearch = text. index (
32
+ self . endOfNextPotentialMatch = text. index (
33
33
text. startIndex, offsetBy: patternCount, limitedBy: text. endIndex)
34
34
self . badCharacterOffsets = Dictionary (
35
35
zip ( pattern, 0 ... ) , uniquingKeysWith: { _, last in last } )
36
36
}
37
37
38
38
@inlinable
39
- mutating func next( ) -> Range < String . Index > ? {
40
- while let end = endOfSearch {
41
- // Empty pattern matches at every position.
42
- if patternCount == 0 {
43
- endOfSearch = end == text. endIndex ? nil : text. index ( after: end)
44
- return end..< end
45
- }
46
-
47
- var patternOffset = patternCount - 1
48
- var patternCursor = pattern. index ( before: pattern. endIndex)
49
- var textCursor = text. index ( before: end)
39
+ func nextRange( searchFromEnd end: String . Index )
40
+ -> ( result: Range < String . Index > ? , nextEnd: String . Index ? )
41
+ {
42
+ // Empty pattern matches at every position.
43
+ if patternCount == 0 {
44
+ return (
45
+ end..< end,
46
+ end == text. endIndex ? nil : text. index ( after: end) )
47
+ }
48
+
49
+ var patternOffset = patternCount - 1
50
+ var patternCursor = pattern. index ( before: pattern. endIndex)
51
+ var textCursor = text. index ( before: end)
52
+
53
+ // Search backwards from `end` to the start of the pattern
54
+ while patternCursor >= pattern. startIndex
55
+ && pattern [ patternCursor] == text [ textCursor]
56
+ {
57
+ patternOffset -= 1
50
58
51
- // Search backwards from `end` to the start of the pattern
52
- while patternCursor >= pattern. startIndex
53
- && pattern [ patternCursor] == text [ textCursor]
54
- {
55
- patternOffset -= 1
56
-
57
- // Success!
58
- if patternCursor == pattern. startIndex {
59
- // Calculate the offset for the next search.
60
- endOfSearch = text. index ( end, offsetBy: patternCount, limitedBy: text. endIndex)
61
- return textCursor..< end
62
- }
63
-
64
- precondition ( textCursor > text. startIndex)
65
- text. formIndex ( before: & textCursor)
66
- pattern. formIndex ( before: & patternCursor)
59
+ // Success!
60
+ if patternCursor == pattern. startIndex {
61
+ // Calculate the offset for the next search.
62
+ return (
63
+ textCursor..< end,
64
+ text. index ( end, offsetBy: patternCount, limitedBy: text. endIndex) )
67
65
}
68
66
69
- // Match failed - calculate the end index of the next possible
70
- // candidate, based on the `badCharacterOffsets` table and the
71
- // current position in the pattern.
72
- let shiftOffset = Swift . max (
73
- 1 ,
74
- patternOffset - ( badCharacterOffsets [ text [ textCursor] ] ?? 0 ) )
75
- endOfSearch = text. index (
76
- end, offsetBy: shiftOffset, limitedBy: text. endIndex)
67
+ precondition ( textCursor > text. startIndex)
68
+ text. formIndex ( before: & textCursor)
69
+ pattern. formIndex ( before: & patternCursor)
77
70
}
78
- return nil
71
+
72
+ // Match failed - calculate the end index of the next possible
73
+ // candidate, based on the `badCharacterOffsets` table and the
74
+ // current position in the pattern.
75
+ let shiftOffset = Swift . max (
76
+ 1 ,
77
+ patternOffset - ( badCharacterOffsets [ text [ textCursor] ] ?? 0 ) )
78
+ let nextEnd = text. index (
79
+ end, offsetBy: shiftOffset, limitedBy: text. endIndex)
80
+ guard let nextEnd else { return ( nil , nil ) }
81
+ return nextRange ( searchFromEnd: nextEnd)
82
+ }
83
+
84
+ @inlinable
85
+ mutating func next( ) -> Range < String . Index > ? {
86
+ guard let end = endOfNextPotentialMatch else { return nil }
87
+ let ( result, nextEnd) = nextRange ( searchFromEnd: end)
88
+ endOfNextPotentialMatch = nextEnd
89
+ return result
90
+ // while let end = endOfSearch {
91
+ // // Empty pattern matches at every position.
92
+ // if patternCount == 0 {
93
+ // endOfSearch = end == text.endIndex ? nil : text.index(after: end)
94
+ // return end..<end
95
+ // }
96
+ //
97
+ // var patternOffset = patternCount - 1
98
+ // var patternCursor = pattern.index(before: pattern.endIndex)
99
+ // var textCursor = text.index(before: end)
100
+ //
101
+ // // Search backwards from `end` to the start of the pattern
102
+ // while patternCursor >= pattern.startIndex
103
+ // && pattern[patternCursor] == text[textCursor]
104
+ // {
105
+ // patternOffset -= 1
106
+ //
107
+ // // Success!
108
+ // if patternCursor == pattern.startIndex {
109
+ // // Calculate the offset for the next search.
110
+ // endOfSearch = text.index(end, offsetBy: patternCount, limitedBy: text.endIndex)
111
+ // return textCursor..<end
112
+ // }
113
+ //
114
+ // precondition(textCursor > text.startIndex)
115
+ // text.formIndex(before: &textCursor)
116
+ // pattern.formIndex(before: &patternCursor)
117
+ // }
118
+ //
119
+ // // Match failed - calculate the end index of the next possible
120
+ // // candidate, based on the `badCharacterOffsets` table and the
121
+ // // current position in the pattern.
122
+ // let shiftOffset = Swift.max(
123
+ // 1,
124
+ // patternOffset - (badCharacterOffsets[text[textCursor]] ?? 0))
125
+ // endOfSearch = text.index(
126
+ // end, offsetBy: shiftOffset, limitedBy: text.endIndex)
127
+ // }
128
+ // return nil
79
129
}
80
130
}
81
131
132
+ extension SubstringSearcher {
133
+ struct Coll : Collection {
134
+ var iterator : SubstringSearcher
135
+ var startIndex : Index
136
+
137
+ var endIndex : Index { Index ( ) }
138
+
139
+ init ( iterator: SubstringSearcher ) {
140
+ var iterator = iterator
141
+ self . startIndex = Index ( range: iterator. next ( ) )
142
+ self . iterator = iterator
143
+ }
144
+
145
+ struct Index : Comparable {
146
+ var range : Range < String . Index > ?
147
+ var endOfNextPotentialMatch : String . Index ?
148
+
149
+ static func < ( lhs: Index , rhs: Index ) -> Bool {
150
+ switch ( lhs. range, rhs. range) {
151
+ case ( nil , _) : false
152
+ case ( _, nil ) : true
153
+ case let ( lhs? , rhs? ) :
154
+ lhs. lowerBound < rhs. lowerBound
155
+ }
156
+ }
157
+ }
158
+
159
+ subscript( index: Index ) -> Range < String . Index > {
160
+ index. range!
161
+ }
162
+
163
+ func index( after index: Index ) -> Index {
164
+ let ( range, next) = iterator. nextRange (
165
+ searchFromEnd: index. endOfNextPotentialMatch!)
166
+ return Index ( range: range, endOfNextPotentialMatch: next)
167
+ }
168
+ }
169
+
170
+ var collection : Coll {
171
+ . init( iterator: self )
172
+ }
173
+ }
0 commit comments