Skip to content

Commit 007a1aa

Browse files
committed
API improvements
1 parent d855df4 commit 007a1aa

File tree

3 files changed

+113
-140
lines changed

3 files changed

+113
-140
lines changed

pydatastructs/strings/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from . import (
44
trie,
5-
string_matching_algorithms
5+
algorithms
66
)
77

88
from .trie import (
@@ -11,8 +11,8 @@
1111

1212
__all__.extend(trie.__all__)
1313

14-
from .string_matching_algorithms import (
15-
find_string
14+
from .algorithms import (
15+
find
1616
)
1717

18-
__all__.extend(string_matching_algorithms.__all__)
18+
__all__.extend(algorithms.__all__)

pydatastructs/strings/algorithms.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
from pydatastructs.linear_data_structures.arrays import (
2+
DynamicOneDimensionalArray, OneDimensionalArray)
3+
4+
__all__ = [
5+
'find'
6+
]
7+
8+
def find(text, query, algorithm):
9+
"""
10+
Finds occurrence of a query string within the text string.
11+
12+
Parameters
13+
==========
14+
15+
text: str
16+
The string on which query is to be performed.
17+
query: str
18+
The string which is to be searched in the text.
19+
algorithm: str
20+
The algorithm which should be used for
21+
searching.
22+
Currently the following algorithms are
23+
supported,
24+
'kmp' -> Knuth-Morris-Pratt as given in [1].
25+
26+
Returns
27+
=======
28+
29+
DynamicOneDimensionalArray
30+
An array of starting positions of the portions
31+
in the text which match with the given query.
32+
33+
Examples
34+
========
35+
36+
>>> from pydatastructs.strings.algorithms import find
37+
>>> text = "abcdefabcabe"
38+
>>> pos = find(text, "ab", algorithm="kmp")
39+
>>> str(pos)
40+
"['0', '6', '9']"
41+
>>> pos = find(text, "abc", algorithm="kmp")
42+
>>> str(pos)
43+
"['0', '6']"
44+
>>> pos = find(text, "abe", algorithm="kmp")
45+
>>> str(pos)
46+
"['9']"
47+
>>> pos = find(text, "abed", algorithm="kmp")
48+
>>> str(pos)
49+
'[]'
50+
51+
References
52+
==========
53+
54+
.. [1] https://www.inf.hs-flensburg.de/lang/algorithmen/pattern/kmpen.htm
55+
"""
56+
import pydatastructs.strings.algorithms as algorithms
57+
func = "_" + algorithm
58+
if not hasattr(algorithms, func):
59+
raise NotImplementedError(
60+
"Currently %s algoithm for searching strings "
61+
"inside a text isn't implemented yet."
62+
%(algorithm))
63+
return getattr(algorithms, func)(text, query)
64+
65+
66+
def _knuth_morris_pratt(text, query):
67+
kmp_table = _build_kmp_table(query)
68+
return _do_match(text, query, kmp_table)
69+
70+
_kmp = _knuth_morris_pratt
71+
72+
def _build_kmp_table(query):
73+
pos, cnd = 1, 0
74+
kmp_table = OneDimensionalArray(int, len(query) + 1)
75+
76+
kmp_table[0] = -1
77+
78+
while pos < len(query):
79+
if query[pos] == query[cnd]:
80+
kmp_table[pos] = kmp_table[cnd]
81+
else:
82+
kmp_table[pos] = cnd
83+
while cnd >= 0 and query[pos] != query[cnd]:
84+
cnd = kmp_table[cnd]
85+
pos, cnd = pos + 1, cnd + 1
86+
kmp_table[pos] = cnd
87+
88+
return kmp_table
89+
90+
91+
92+
def _do_match(string, query, kmp_table):
93+
j, k = 0, 0
94+
positions = DynamicOneDimensionalArray(int, 0)
95+
96+
while j < len(string):
97+
if query[k] == string[j]:
98+
j = j + 1
99+
k = k + 1
100+
if k == len(query):
101+
positions.append(j - k)
102+
k = kmp_table[k]
103+
else:
104+
k = kmp_table[k]
105+
if k < 0:
106+
j = j + 1
107+
k = k + 1
108+
109+
return positions

pydatastructs/strings/string_matching_algorithms.py

Lines changed: 0 additions & 136 deletions
This file was deleted.

0 commit comments

Comments
 (0)