codezonediitj · czgdp1807 · Oct 10, 2021 · Jun 5, 2021 · Oct 10, 2021 · Oct 10, 2021
diff --git a/pydatastructs/strings/__init__.py b/pydatastructs/strings/__init__.py
@@ -1,8 +1,18 @@
 __all__ = []
 
-from . import trie
+from . import (
+    trie,
+    algorithms
+)
+
 from .trie import (
     Trie
 )
 
 __all__.extend(trie.__all__)
+
+from .algorithms import (
+    find
+)
+
+__all__.extend(algorithms.__all__)
diff --git a/pydatastructs/strings/algorithms.py b/pydatastructs/strings/algorithms.py
@@ -0,0 +1,109 @@
+from pydatastructs.linear_data_structures.arrays import (
+    DynamicOneDimensionalArray, OneDimensionalArray)
+
+__all__ = [
+    'find'
+]
+
+def find(text, query, algorithm):
+    """
+    Finds occurrence of a query string within the text string.
+
+    Parameters
+    ==========
+
+    text: str
+        The string on which query is to be performed.
+    query: str
+        The string which is to be searched in the text.
+    algorithm: str
+        The algorithm which should be used for
+        searching.
+        Currently the following algorithms are
+        supported,
+        'kmp' -> Knuth-Morris-Pratt as given in [1].
+
+    Returns
+    =======
+
+    DynamicOneDimensionalArray
+        An array of starting positions of the portions
+        in the text which match with the given query.
+
+    Examples
+    ========
+
+    >>> from pydatastructs.strings.algorithms import find
+    >>> text = "abcdefabcabe"
+    >>> pos = find(text, "ab", algorithm="kmp")
+    >>> str(pos)
+    "['0', '6', '9']"
+    >>> pos = find(text, "abc", algorithm="kmp")
+    >>> str(pos)
+    "['0', '6']"
+    >>> pos = find(text, "abe", algorithm="kmp")
+    >>> str(pos)
+    "['9']"
+    >>> pos = find(text, "abed", algorithm="kmp")
+    >>> str(pos)
+    '[]'
+
+    References
+    ==========
+
+    .. [1] https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
+    """
+    import pydatastructs.strings.algorithms as algorithms
+    func = "_" + algorithm
+    if not hasattr(algorithms, func):
+        raise NotImplementedError(
+        "Currently %s algoithm for searching strings "
+        "inside a text isn't implemented yet."
+        %(algorithm))
+    return getattr(algorithms, func)(text, query)
+
+
+def _knuth_morris_pratt(text, query):
+    kmp_table = _build_kmp_table(query)
+    return _do_match(text, query, kmp_table)
+
+_kmp = _knuth_morris_pratt
+
+def _build_kmp_table(query):
+    pos, cnd = 1, 0
+    kmp_table = OneDimensionalArray(int, len(query) + 1)
+
+    kmp_table[0] = -1
+
+    while pos < len(query):
+        if query[pos] == query[cnd]:
+            kmp_table[pos] = kmp_table[cnd]
+        else:
+            kmp_table[pos] = cnd
+            while cnd >= 0 and query[pos] != query[cnd]:
+                cnd = kmp_table[cnd]
+        pos, cnd = pos + 1, cnd + 1
+    kmp_table[pos] = cnd
+
+    return kmp_table
+
+
+
+def _do_match(string, query, kmp_table):
+    j, k = 0, 0
+    positions = DynamicOneDimensionalArray(int, 0)
+
+    while j < len(string):
+        if query[k] == string[j]:
+            j = j + 1
+            k = k + 1
+            if k == len(query):
+                positions.append(j - k)
+                k = kmp_table[k]
+        else:
+            k = kmp_table[k]
+            if k < 0:
+                j = j + 1
+                k = k + 1
+
+    return positions
diff --git a/pydatastructs/strings/tests/test_algorithms.py b/pydatastructs/strings/tests/test_algorithms.py
@@ -0,0 +1,64 @@
+from pydatastructs.strings import find
+
+import random, string
+
+def test_kmp():
+    _test_common_string_matching('kmp')
+
+
+def _test_common_string_matching(algorithm):
+    true_text_pattern_dictionary = {
+        "Knuth-Morris-Pratt": "-Morris-",
+        "abcabcabcabdabcabdabcabca": "abcabdabcabca",
+        "aefcdfaecdaefaefcdaefeaefcdcdeae": "aefcdaefeaefcd",
+        "aaaaaaaa": "aaa",
+        "fullstringmatch": "fullstringmatch"
+    }
+    for test_case_key in true_text_pattern_dictionary:
+        text = test_case_key
+        query = true_text_pattern_dictionary[test_case_key]
+        positions = find(text, query, algorithm)
+        for i in range(positions._last_pos_filled):
+            p = positions[i]
+            assert text[p:p + len(query)] == query
+
+    false_text_pattern_dictionary = {
+        "Knuth-Morris-Pratt": "-Pratt-",
+        "abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
+        "aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
+        "fullstringmatch": "fullstrinmatch"
+    }
+
+    for test_case_key in false_text_pattern_dictionary:
+        text = test_case_key
+        query = false_text_pattern_dictionary[test_case_key]
+        positions = find(text, query, algorithm)
+        assert positions.size == 0
+
+    random.seed(1000)
+
+    def gen_random_string(length):
+        ascii = string.ascii_uppercase
+        digits = string.digits
+        return ''.join(random.choices(ascii + digits, k=length))
+
+    for _ in range(100):
+        query = gen_random_string(random.randint(3, 10))
+        num_times = random.randint(1, 10)
+        freq = 0
+        text = ""
+        while freq < num_times:
+            rand_str = gen_random_string(random.randint(5, 10))
+            if rand_str != query:
+                freq += 1
+                text += query + rand_str + query
+        positions = find(text, query, algorithm="kmp")
+        assert positions._num == num_times * 2
+        for i in range(positions._last_pos_filled):
+            p = positions[i]
+            assert text[p:p + len(query)] == query
+
+        text = gen_random_string(len(query))
+        if text != query:
+            positions = find(text, query, algorithm="kmp")
+            assert positions.size == 0
diff --git a/pydatastructs/utils/tests/test_code_quality.py b/pydatastructs/utils/tests/test_code_quality.py
@@ -22,7 +22,7 @@ def test_trailing_white_spaces():
         while line != "":
             if line.endswith(" \n") or line.endswith("\t\n") \
                 or line.endswith(" ") or line.endswith("\t"):
-                assert False, "%s contains trailing whitespace at line number %d: %s"\
+                assert False, "%s:%d : %s"\
                                %(file_path, line_number, line)
             line = file.readline()
             line_number += 1