diff --git a/README.md b/README.md index 09e9b6dc9..803c88b0e 100644 --- a/README.md +++ b/README.md @@ -101,11 +101,11 @@ Keep contributing!! Thanks to these wonderful people ✨✨: - - - + + +
- - - -
+ + + +
diff --git a/docs/requirements.txt b/docs/requirements.txt index b3ecdb274..6526a0ef8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ sphinx==4.2.0 -sphinx-readable-theme==1.3.0 \ No newline at end of file +sphinx-readable-theme==1.3.0 +myst_nb==0.13.1 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 1b4721a6e..f096ee613 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,7 @@ master_doc = 'index' # The full version, including alpha/beta/rc tags -release = '0.0.1-dev' +release = '1.0.1-dev' # -- General configuration --------------------------------------------------- @@ -36,9 +36,12 @@ # ones. extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon' + 'sphinx.ext.napoleon', + 'myst_nb' ] +jupyter_execute_notebooks = "off" + napoleon_numpy_docstring = True # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/index.rst b/docs/source/index.rst index cdcaef0a4..9a12f0d64 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -40,6 +40,21 @@ the other, Make sure that your python version is at least ``3.8``. +Why PyDataStructs? +================== + +1. **Single package for all your data structures and algorithms** - We have and are + implementing many popular and useful data structures and algorithms. + +2. **Consistent and Clean Interface** - The APIs we have provided are **consistent** with each other, + **clean** and **easy to use**. We make sure of that before adding any new data structure or algorithm. + +3. **Well Tested** - We thoroughly test our code before making any new addition to PyDataStructs. + **99 percent** lines of our code have already been tested by us. + +So, **you can easily rely on PyDataStructs** for any data structure or algorithm you want to use +**without worrying about implementing** it **from scratch**. Everything is just a few calls away. + Why do we use Python? ===================== @@ -59,6 +74,7 @@ Contents :maxdepth: 1 tutorials.rst + pydatastructs_sphinx_graphs contributing.rst authors.rst pydatastructs/pydatastructs.rst diff --git a/docs/source/pydatastructs_sphinx_graphs.ipynb b/docs/source/pydatastructs_sphinx_graphs.ipynb new file mode 100644 index 000000000..724c0a07c --- /dev/null +++ b/docs/source/pydatastructs_sphinx_graphs.ipynb @@ -0,0 +1,243 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "pydatastructs-sphinx-graphs.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "2qB4MTFoYSdW" + }, + "source": [ + "Comparing Dijkstra and Bellman Ford Shortest Paths Algorithms using PyDataStructs\n", + "====================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zu6G_1RLYitv" + }, + "source": [ + "Dataset\n", + "-------\n", + "\n", + "We have used [California road network](https://snap.stanford.edu/data/roadNet-CA.html) from [Stanford Network Analysis Project](https://snap.stanford.edu/index.html). The intent of this demo is to show how **pydatastructs** can be used for research and analysis purposes.\n", + "\n", + "The above dataset is a road network of California as the name suggests. The intersections and endpoints in this network are represented as vertices and the roads between them are represented as undirected edges. The data is read from a `txt` file where each line contains two numbers representing two points of an edge in the graph. We have used varying number of these edges to analyse how each algorithm responds to the varying scale of the shortest path problem at hand." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YoaukKUfaF66" + }, + "source": [ + "Results\n", + "-------\n", + "\n", + "We observed that for low inverse density (total number of possible edges divided by number of edges present) graphs, both algorithms take similar amounts of time. However Dijkstra algorithm performs significantly better with high inverse density graphs as compared to Bellman Ford algorithm." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_qWIIix_Twd5" + }, + "source": [ + "# Import modules and APIs for Graphs\n", + "from pydatastructs import Graph, AdjacencyListGraphNode\n", + "from pydatastructs import shortest_paths, topological_sort\n", + "\n", + "# Import utility modules\n", + "import timeit, random, functools, matplotlib.pyplot as plt" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8TDkoIgcXJr6" + }, + "source": [ + "def create_Graph(num_edges, file_path, ignore_lines=4):\n", + " \"\"\"\n", + " Creates pydatastructs.Graph object.\n", + "\n", + " Parameters\n", + " ==========\n", + "\n", + " num_edges: int\n", + " Number of edges that should be present in the\n", + " pydatastructs.Graph object.\n", + " file_path: str\n", + " The path to the file containing California\n", + " road network dataset.\n", + " ignore_lines: int\n", + " Number of inital lines that should be ignored.\n", + " Optional, by default 4 because the first 4 lines\n", + " contain documentation of the dataset which is not\n", + " required to generate the pydatastructs.Graph object.\n", + " \n", + " Returns\n", + " =======\n", + "\n", + " G: pydatastructs.Graph\n", + " \"\"\"\n", + " f = open(file_path, 'r')\n", + " for _ in range(ignore_lines):\n", + " f.readline()\n", + " G = Graph()\n", + " inp = f.readline().split()\n", + " for _ in range(num_edges):\n", + " u, v = inp\n", + " G.add_vertex(AdjacencyListGraphNode(u))\n", + " G.add_vertex(AdjacencyListGraphNode(v))\n", + " G.add_edge(u, v, random.randint(1, 1000)) \n", + " inp = f.readline().split()\n", + " return G" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "qRS4Rz-ZRZ51" + }, + "source": [ + "def generate_data(file_name, min_num_edges, max_num_edges, increment):\n", + " \"\"\"\n", + " Generates computation time data for Dijkstra and Bellman ford\n", + " algorithms using pydatastructs.shortest_paths.\n", + "\n", + " Parameters\n", + " ==========\n", + "\n", + " file_path: str\n", + " The path to the file containing California\n", + " road network dataset.\n", + " min_num_edges: int\n", + " The minimum number of edges to be used for\n", + " comparison of algorithms.\n", + " max_num_edges: int\n", + " The maximum number of edges to be used for comparison\n", + " of algorithms.\n", + " increment: int\n", + " The value to be used to increment the scale of the\n", + " shortest path problem. For example if using 50 edges,\n", + " and increment value is 10, then in the next iteration,\n", + " 60 edges will be used and in the next to next iteration,\n", + " 70 edges will be used and so on until we hit the max_num_edges\n", + " value.\n", + "\n", + " Returns\n", + " =======\n", + "\n", + " graph_data, data_dijkstra, data_bellman_ford: (list, list, list)\n", + " graph_data contains tuples of number of vertices and number\n", + " of edges.\n", + " data_dijkstra contains the computation time values for each\n", + " graph when Dijkstra algorithm is used.\n", + " data_bellman_ford contains the computation time values for each\n", + " graph when Bellman ford algorithm is used. \n", + " \"\"\"\n", + " data_dijkstra, data_bellman_ford, graph_data = [], [], []\n", + " for edge in range(min_num_edges, max_num_edges + 1, increment):\n", + " G = create_Graph(edge, file_name)\n", + " t = timeit.Timer(functools.partial(shortest_paths, G, 'dijkstra', '1'))\n", + " t_djk = t.repeat(1, 1)\n", + " t = timeit.Timer(functools.partial(shortest_paths, G, 'bellman_ford', '1'))\n", + " t_bf = t.repeat(1, 1)\n", + " graph_data.append((len(G.vertices), len(G.edge_weights)))\n", + " data_dijkstra.append(t_djk[0])\n", + " data_bellman_ford.append(t_bf[0])\n", + " return graph_data, data_dijkstra, data_bellman_ford" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "GTeSF1ChA2Bz" + }, + "source": [ + "def plot_data(graph_data, data_dijkstra, data_bellman_ford):\n", + " \"\"\"\n", + " Utility function to plot the computation time values\n", + " for Dijkstra and Bellman ford algorithms versus\n", + " the inverse density of the input graph.\n", + " \"\"\"\n", + " idensity, time_dijkstra, time_bellman_ford = [], [], []\n", + " for datum_graph, datum_djk, datum_bf in zip(graph_data, data_dijkstra, data_bellman_ford):\n", + " num_edges, num_vertices = datum_graph[1], datum_graph[0]\n", + " idensity.append((num_vertices*(num_vertices - 1))/(2*num_edges))\n", + " time_dijkstra.append(datum_djk)\n", + " time_bellman_ford.append(datum_bf)\n", + " plt.xlabel(\"Inverse Density of Input Graph\")\n", + " plt.ylabel(\"Computation Time (s)\")\n", + " plt.plot(idensity, time_dijkstra, label=\"Dijkstra\")\n", + " plt.plot(idensity, time_bellman_ford, label=\"Bellman Ford\")\n", + " plt.legend(loc=\"best\")\n", + " plt.show()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "UXqC736NXfs2" + }, + "source": [ + "graph_data, data_djk, data_bf = generate_data('roadNet-CA.txt', 50, 2000, 50)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "EEEPGwOpV_DC", + "outputId": "8e84de7b-c905-4cd1-e075-514982c2ef22" + }, + "source": [ + "plot_data(graph_data, data_djk, data_bf)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 704b4b19c..519b9ac6a 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -4,7 +4,7 @@ Tutorials We provide the following tutorials to show how ``pydatastructs`` APIs can help in solving complicated data structures and algorithms problems easily. For now the problems are abstract. However, we plan -to add some examples showing usage of ``pydatastructs`` on real world +to add some more examples showing usage of ``pydatastructs`` on real world data sets such as `Stanford Large Network Dataset Collection `_ and `Urban Dictionary Words And Definitions `_. If you are interested in playing around with the above datasets using our API, diff --git a/pydatastructs/__init__.py b/pydatastructs/__init__.py index 5568ebd3d..41f351945 100644 --- a/pydatastructs/__init__.py +++ b/pydatastructs/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.1-dev" +__version__ = "1.0.1-dev" from .linear_data_structures import * from .trees import * diff --git a/pydatastructs/graphs/adjacency_list.py b/pydatastructs/graphs/adjacency_list.py index a06e35533..541ff3a1e 100644 --- a/pydatastructs/graphs/adjacency_list.py +++ b/pydatastructs/graphs/adjacency_list.py @@ -26,8 +26,8 @@ def __new__(cls, *vertices): @classmethod def methods(self): return ['is_adjacent', 'neighbors', - 'add_vertex', 'remove_vertex', 'add_edge', - 'get_edge', 'remove_edge', '__new__'] + 'add_vertex', 'remove_vertex', 'add_edge', + 'get_edge', 'remove_edge', '__new__'] def is_adjacent(self, node1, node2): node1 = self.__getattribute__(node1) @@ -52,8 +52,21 @@ def remove_vertex(self, name): node_obj.adjacent.remove(name) def add_edge(self, source, target, cost=None): + source, target = str(source), str(target) + error_msg = ("Vertex %s is not present in the graph." + "Call Graph.add_vertex to add a new" + "vertex. Graph.add_edge is only responsible" + "for adding edges and it will not add new" + "vertices on its own. This is done to maintain" + "clear separation between the functionality of" + "these two methods.") + if not hasattr(self, source): + raise ValueError(error_msg % (source)) + if not hasattr(self, target): + raise ValueError(error_msg % (target)) + source, target = self.__getattribute__(source), \ - self.__getattribute__(target) + self.__getattribute__(target) source.add_adjacent_node(target.name) if cost is not None: self.edge_weights[source.name + "_" + target.name] = \ diff --git a/pydatastructs/graphs/adjacency_matrix.py b/pydatastructs/graphs/adjacency_matrix.py index a75efabdf..64a39494c 100644 --- a/pydatastructs/graphs/adjacency_matrix.py +++ b/pydatastructs/graphs/adjacency_matrix.py @@ -57,6 +57,18 @@ def remove_vertex(self, node): def add_edge(self, source, target, cost=None): source, target = str(source), str(target) + error_msg = ("Vertex %s is not present in the graph." + "Call Graph.add_vertex to add a new" + "vertex. Graph.add_edge is only responsible" + "for adding edges and it will not add new" + "vertices on its own. This is done to maintain" + "clear separation between the functionality of" + "these two methods.") + if source not in self.matrix: + raise ValueError(error_msg % (source)) + if target not in self.matrix: + raise ValueError(error_msg % (target)) + self.matrix[source][target] = True if cost is not None: self.edge_weights[source + "_" + target] = \ diff --git a/pydatastructs/graphs/graph.py b/pydatastructs/graphs/graph.py index f19fc8e97..0c1b195a8 100644 --- a/pydatastructs/graphs/graph.py +++ b/pydatastructs/graphs/graph.py @@ -49,6 +49,16 @@ class Graph(object): ========== .. [1] https://en.wikipedia.org/wiki/Graph_(abstract_data_type) + + Note + ==== + + Make sure to create nodes (AdjacencyListGraphNode or AdjacencyMatrixGraphNode) + and them in your graph using Graph.add_vertex before adding edges whose + end points require either of the nodes that you added. In other words, + Graph.add_edge doesn't add new nodes on its own if the input + nodes are not already present in the Graph. + """ __slots__ = ['_impl'] @@ -89,7 +99,8 @@ def neighbors(self, node): def add_vertex(self, node): """ - Adds the input vertex to the node. + Adds the input vertex to the node, or does nothing + if the input vertex is already in the graph. """ raise NotImplementedError( "This is an abstract method.") @@ -97,7 +108,7 @@ def add_vertex(self, node): def remove_vertex(self, node): """ Removes the input vertex along with all the edges - pointing towards to it. + pointing towards it. """ raise NotImplementedError( "This is an abstract method.") diff --git a/pydatastructs/graphs/tests/test_adjacency_list.py b/pydatastructs/graphs/tests/test_adjacency_list.py index d1635fc83..3dcef8a7a 100644 --- a/pydatastructs/graphs/tests/test_adjacency_list.py +++ b/pydatastructs/graphs/tests/test_adjacency_list.py @@ -1,5 +1,6 @@ from pydatastructs.graphs import Graph from pydatastructs.utils import AdjacencyListGraphNode +from pydatastructs.utils.raises_util import raises def test_adjacency_list(): v_1 = AdjacencyListGraphNode('v_1', 1) @@ -38,3 +39,6 @@ def test_adjacency_list(): g.remove_vertex('v') assert g.is_adjacent('v_2', 'v') is False assert g.is_adjacent('v_3', 'v') is False + + assert raises(ValueError, lambda: g.add_edge('u', 'v')) + assert raises(ValueError, lambda: g.add_edge('v', 'x')) diff --git a/pydatastructs/graphs/tests/test_adjacency_matrix.py b/pydatastructs/graphs/tests/test_adjacency_matrix.py index 3df819611..c0f7b7c06 100644 --- a/pydatastructs/graphs/tests/test_adjacency_matrix.py +++ b/pydatastructs/graphs/tests/test_adjacency_matrix.py @@ -1,5 +1,6 @@ from pydatastructs.graphs import Graph from pydatastructs.utils import AdjacencyMatrixGraphNode +from pydatastructs.utils.raises_util import raises def test_AdjacencyMatrix(): v_0 = AdjacencyMatrixGraphNode(0, 0) @@ -25,3 +26,6 @@ def test_AdjacencyMatrix(): assert neighbors == [v_1] g.remove_edge(0, 1) assert g.is_adjacent(0, 1) is False + + assert raises(ValueError, lambda: g.add_edge('u', 'v')) + assert raises(ValueError, lambda: g.add_edge('v', 'x')) diff --git a/pydatastructs/linear_data_structures/arrays.py b/pydatastructs/linear_data_structures/arrays.py index 32fa0ee42..e4bc24223 100644 --- a/pydatastructs/linear_data_structures/arrays.py +++ b/pydatastructs/linear_data_structures/arrays.py @@ -15,7 +15,8 @@ def __str__(self) -> str: class OneDimensionalArray(Array): ''' - Represents one dimensional arrays. + Represents one dimensional static arrays of + fixed size. Parameters ========== @@ -269,7 +270,8 @@ class DynamicArray(Array): class DynamicOneDimensionalArray(DynamicArray, OneDimensionalArray): """ - Represents dynamic one dimensional arrays. + Represents resizable and dynamic one + dimensional arrays. Parameters ========== diff --git a/pydatastructs/miscellaneous_data_structures/algorithms.py b/pydatastructs/miscellaneous_data_structures/algorithms.py index 5f2204a8a..1a4ffc56a 100644 --- a/pydatastructs/miscellaneous_data_structures/algorithms.py +++ b/pydatastructs/miscellaneous_data_structures/algorithms.py @@ -53,11 +53,11 @@ class RangeQueryStatic: >>> from pydatastructs import minimum >>> arr = OneDimensionalArray(int, [4, 6, 1, 5, 7, 3]) >>> RMQ = RangeQueryStatic(arr, minimum) - >>> RMQ.query(3, 5) + >>> RMQ.query(3, 4) 5 - >>> RMQ.query(0, 5) + >>> RMQ.query(0, 4) 1 - >>> RMQ.query(0, 3) + >>> RMQ.query(0, 2) 1 Note @@ -97,9 +97,7 @@ def query(start, end): start: int The starting index of the range. end: int - The index just before which the range ends. - This means that this index will be excluded - from the range for generating results. + The ending index of the range. """ raise NotImplementedError( "This is an abstract method.") @@ -121,7 +119,7 @@ def methods(cls): return ['query'] def query(self, start, end): - _check_range_query_inputs((start, end), self.bounds) + _check_range_query_inputs((start, end + 1), self.bounds) return self.sparse_table.query(start, end) @@ -140,14 +138,14 @@ def methods(cls): return ['query'] def query(self, start, end): - _check_range_query_inputs((start, end), (0, len(self.array))) + _check_range_query_inputs((start, end + 1), (0, len(self.array))) - rsize = end - start + rsize = end - start + 1 if rsize == 1: return self.func((self.array[start],)) query_ans = self.func((self.array[start], self.array[start + 1])) - for i in range(start + 2, end): + for i in range(start + 2, end + 1): query_ans = self.func((query_ans, self.array[i])) return query_ans diff --git a/pydatastructs/miscellaneous_data_structures/sparse_table.py b/pydatastructs/miscellaneous_data_structures/sparse_table.py index 02000724d..972612eb1 100644 --- a/pydatastructs/miscellaneous_data_structures/sparse_table.py +++ b/pydatastructs/miscellaneous_data_structures/sparse_table.py @@ -86,11 +86,8 @@ def query(self, start, end): start: int The starting index of the range. end: int - The index just before which the range ends. - This means that this index will be excluded - from the range for generating results. + The ending index of the range. """ - end -= 1 j = int(math.log2(end - start + 1)) + 1 answer = None while j >= 0: diff --git a/pydatastructs/miscellaneous_data_structures/tests/test_range_query_static.py b/pydatastructs/miscellaneous_data_structures/tests/test_range_query_static.py index 86212ab11..e898653c9 100644 --- a/pydatastructs/miscellaneous_data_structures/tests/test_range_query_static.py +++ b/pydatastructs/miscellaneous_data_structures/tests/test_range_query_static.py @@ -12,9 +12,9 @@ def _test_RangeQueryStatic_common(func, gen_expected): array = OneDimensionalArray(int, [1]) rq = RangeQueryStatic(array, func) - assert rq.query(0, 1) == 1 - raises(ValueError, lambda: rq.query(0, 0)) - raises(IndexError, lambda: rq.query(0, 2)) + assert rq.query(0, 0) == 1 + raises(ValueError, lambda: rq.query(0, -1)) + raises(IndexError, lambda: rq.query(0, 1)) array_sizes = [3, 6, 12, 24, 48, 96] random.seed(0) @@ -38,18 +38,18 @@ def _test_RangeQueryStatic_common(func, gen_expected): def test_RangeQueryStatic_minimum(): def _gen_minimum_expected(data, i, j): - return min(data[i:j]) + return min(data[i:j + 1]) _test_RangeQueryStatic_common(minimum, _gen_minimum_expected) def test_RangeQueryStatic_greatest_common_divisor(): def _gen_gcd_expected(data, i, j): - if j - i == 1: + if j == i: return data[i] else: expected_gcd = math.gcd(data[i], data[i + 1]) - for idx in range(i + 2, j): + for idx in range(i + 2, j + 1): expected_gcd = math.gcd(expected_gcd, data[idx]) return expected_gcd @@ -58,6 +58,6 @@ def _gen_gcd_expected(data, i, j): def test_RangeQueryStatic_summation(): def _gen_summation_expected(data, i, j): - return sum(data[i:j]) + return sum(data[i:j + 1]) return _test_RangeQueryStatic_common(summation, _gen_summation_expected) diff --git a/setup.py b/setup.py index 2d23ecdd0..45f293265 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ long_description = fh.read() setuptools.setup( - name="pydatastructs", - version="0.0.1-dev", + name="cz-pydatastructs", + version="1.0.1-dev", author="PyDataStructs Development Team", author_email="pydatastructs@googlegroups.com", description="A python package for data structures", @@ -17,7 +17,6 @@ "Programming Language :: Python :: 3", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", - "Topic :: Education :: Data Structures", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Software Development :: Libraries"