Skip to content

Commit 714c2a0

Browse files
committed
...
1 parent 2fadbbe commit 714c2a0

File tree

1 file changed

+131
-39
lines changed

1 file changed

+131
-39
lines changed

Sources/_StringProcessing/Algorithms/Algorithms/SubstringSearcher.swift

Lines changed: 131 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
/// An implementation of the Boyer-Moore algorithm, for string-specific
12+
/// An implementation of the Boyer-Moore-Horspool algorithm, for string-specific
1313
/// searching.
1414
@usableFromInline
1515
struct SubstringSearcher: Sequence, IteratorProtocol {
@@ -22,60 +22,152 @@ struct SubstringSearcher: Sequence, IteratorProtocol {
2222
@usableFromInline
2323
let patternCount: Int
2424
@usableFromInline
25-
var endOfSearch: String.Index?
25+
var endOfNextPotentialMatch: String.Index?
2626

2727
@usableFromInline
2828
init(text: Substring, pattern: Substring) {
2929
self.text = text
3030
self.pattern = pattern
3131
self.patternCount = pattern.count
32-
self.endOfSearch = text.index(
32+
self.endOfNextPotentialMatch = text.index(
3333
text.startIndex, offsetBy: patternCount, limitedBy: text.endIndex)
3434
self.badCharacterOffsets = Dictionary(
3535
zip(pattern, 0...), uniquingKeysWith: { _, last in last })
3636
}
3737

3838
@inlinable
39-
mutating func next() -> Range<String.Index>? {
40-
while let end = endOfSearch {
41-
// Empty pattern matches at every position.
42-
if patternCount == 0 {
43-
endOfSearch = end == text.endIndex ? nil : text.index(after: end)
44-
return end..<end
45-
}
46-
47-
var patternOffset = patternCount - 1
48-
var patternCursor = pattern.index(before: pattern.endIndex)
49-
var textCursor = text.index(before: end)
39+
func nextRange(searchFromEnd end: String.Index)
40+
-> (result: Range<String.Index>?, nextEnd: String.Index?)
41+
{
42+
// Empty pattern matches at every position.
43+
if patternCount == 0 {
44+
return (
45+
end..<end,
46+
end == text.endIndex ? nil : text.index(after: end))
47+
}
48+
49+
var patternOffset = patternCount - 1
50+
var patternCursor = pattern.index(before: pattern.endIndex)
51+
var textCursor = text.index(before: end)
52+
53+
// Search backwards from `end` to the start of the pattern
54+
while patternCursor >= pattern.startIndex
55+
&& pattern[patternCursor] == text[textCursor]
56+
{
57+
patternOffset -= 1
5058

51-
// Search backwards from `end` to the start of the pattern
52-
while patternCursor >= pattern.startIndex
53-
&& pattern[patternCursor] == text[textCursor]
54-
{
55-
patternOffset -= 1
56-
57-
// Success!
58-
if patternCursor == pattern.startIndex {
59-
// Calculate the offset for the next search.
60-
endOfSearch = text.index(end, offsetBy: patternCount, limitedBy: text.endIndex)
61-
return textCursor..<end
62-
}
63-
64-
precondition(textCursor > text.startIndex)
65-
text.formIndex(before: &textCursor)
66-
pattern.formIndex(before: &patternCursor)
59+
// Success!
60+
if patternCursor == pattern.startIndex {
61+
// Calculate the offset for the next search.
62+
return (
63+
textCursor..<end,
64+
text.index(end, offsetBy: patternCount, limitedBy: text.endIndex))
6765
}
6866

69-
// Match failed - calculate the end index of the next possible
70-
// candidate, based on the `badCharacterOffsets` table and the
71-
// current position in the pattern.
72-
let shiftOffset = Swift.max(
73-
1,
74-
patternOffset - (badCharacterOffsets[text[textCursor]] ?? 0))
75-
endOfSearch = text.index(
76-
end, offsetBy: shiftOffset, limitedBy: text.endIndex)
67+
precondition(textCursor > text.startIndex)
68+
text.formIndex(before: &textCursor)
69+
pattern.formIndex(before: &patternCursor)
7770
}
78-
return nil
71+
72+
// Match failed - calculate the end index of the next possible
73+
// candidate, based on the `badCharacterOffsets` table and the
74+
// current position in the pattern.
75+
let shiftOffset = Swift.max(
76+
1,
77+
patternOffset - (badCharacterOffsets[text[textCursor]] ?? 0))
78+
let nextEnd = text.index(
79+
end, offsetBy: shiftOffset, limitedBy: text.endIndex)
80+
guard let nextEnd else { return (nil, nil) }
81+
return nextRange(searchFromEnd: nextEnd)
82+
}
83+
84+
@inlinable
85+
mutating func next() -> Range<String.Index>? {
86+
guard let end = endOfNextPotentialMatch else { return nil }
87+
let (result, nextEnd) = nextRange(searchFromEnd: end)
88+
endOfNextPotentialMatch = nextEnd
89+
return result
90+
// while let end = endOfSearch {
91+
// // Empty pattern matches at every position.
92+
// if patternCount == 0 {
93+
// endOfSearch = end == text.endIndex ? nil : text.index(after: end)
94+
// return end..<end
95+
// }
96+
//
97+
// var patternOffset = patternCount - 1
98+
// var patternCursor = pattern.index(before: pattern.endIndex)
99+
// var textCursor = text.index(before: end)
100+
//
101+
// // Search backwards from `end` to the start of the pattern
102+
// while patternCursor >= pattern.startIndex
103+
// && pattern[patternCursor] == text[textCursor]
104+
// {
105+
// patternOffset -= 1
106+
//
107+
// // Success!
108+
// if patternCursor == pattern.startIndex {
109+
// // Calculate the offset for the next search.
110+
// endOfSearch = text.index(end, offsetBy: patternCount, limitedBy: text.endIndex)
111+
// return textCursor..<end
112+
// }
113+
//
114+
// precondition(textCursor > text.startIndex)
115+
// text.formIndex(before: &textCursor)
116+
// pattern.formIndex(before: &patternCursor)
117+
// }
118+
//
119+
// // Match failed - calculate the end index of the next possible
120+
// // candidate, based on the `badCharacterOffsets` table and the
121+
// // current position in the pattern.
122+
// let shiftOffset = Swift.max(
123+
// 1,
124+
// patternOffset - (badCharacterOffsets[text[textCursor]] ?? 0))
125+
// endOfSearch = text.index(
126+
// end, offsetBy: shiftOffset, limitedBy: text.endIndex)
127+
// }
128+
// return nil
79129
}
80130
}
81131

132+
extension SubstringSearcher {
133+
struct Coll: Collection {
134+
var iterator: SubstringSearcher
135+
var startIndex: Index
136+
137+
var endIndex: Index { Index() }
138+
139+
init(iterator: SubstringSearcher) {
140+
var iterator = iterator
141+
self.startIndex = Index(range: iterator.next())
142+
self.iterator = iterator
143+
}
144+
145+
struct Index: Comparable {
146+
var range: Range<String.Index>?
147+
var endOfNextPotentialMatch: String.Index?
148+
149+
static func < (lhs: Index, rhs: Index) -> Bool {
150+
switch (lhs.range, rhs.range) {
151+
case (nil, _): false
152+
case (_, nil): true
153+
case let (lhs?, rhs?):
154+
lhs.lowerBound < rhs.lowerBound
155+
}
156+
}
157+
}
158+
159+
subscript(index: Index) -> Range<String.Index> {
160+
index.range!
161+
}
162+
163+
func index(after index: Index) -> Index {
164+
let (range, next) = iterator.nextRange(
165+
searchFromEnd: index.endOfNextPotentialMatch!)
166+
return Index(range: range, endOfNextPotentialMatch: next)
167+
}
168+
}
169+
170+
var collection: Coll {
171+
.init(iterator: self)
172+
}
173+
}

0 commit comments

Comments
 (0)