diff --git a/docs/source/pydatastructs_urban_dictionary_example.ipynb b/docs/source/pydatastructs_urban_dictionary_example.ipynb new file mode 100644 index 00000000..3212a2fb --- /dev/null +++ b/docs/source/pydatastructs_urban_dictionary_example.ipynb @@ -0,0 +1,907 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis of the PyDataStructs string matching API\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset\n", + "\n", + "We have used the [Urban Dictionary Words and Definitions](www.kaggle.com/datasets/therohk/urban-dictionary-words-dataset) from [Urban Dictionary](https://www.urbandictionary.com/). The intent of this demo is to show how the different **pydatastructs** string matching API can be used and their performances.\n", + "\n", + "The Urban Dictionary Words and Definitions dataset contains a collection of crowdsourced slang terms and their meanings from Urban Dictionary, a popular user-driven dictionary for contemporary language and internet jargon. This dataset includes word entries, corresponding definitions, upvote/downvote counts, and other metadata, making it valuable for natural language processing (NLP) tasks, sentiment analysis, and linguistic research. Due to its informal nature, the dataset captures evolving slang, cultural references, and colloquial expressions, providing insights into modern language trends.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import API for string searching\n", + "from pydatastructs import find\n", + "\n", + "# Import util modules\n", + "import time\n", + "import pandas as pd\n", + "from pandas.core.frame import DataFrame\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Load dataset\n", + "df: DataFrame = pd.read_csv(\"urbandict-word-defs.csv\", low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset Shape:\n", + "(1048525, 12)\n", + "Dataset Columns:\n", + "Index(['word_id', 'word', 'up_votes', 'down_votes', 'author', 'definition',\n", + " 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10',\n", + " 'Unnamed: 11'],\n", + " dtype='object')\n", + "Dataset Info:\n", + "\n", + "RangeIndex: 1048525 entries, 0 to 1048524\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 word_id 1048525 non-null int64 \n", + " 1 word 1048500 non-null object\n", + " 2 up_votes 1048497 non-null object\n", + " 3 down_votes 1048521 non-null object\n", + " 4 author 1048524 non-null object\n", + " 5 definition 1048362 non-null object\n", + " 6 Unnamed: 6 153 non-null object\n", + " 7 Unnamed: 7 34 non-null object\n", + " 8 Unnamed: 8 6 non-null object\n", + " 9 Unnamed: 9 2 non-null object\n", + " 10 Unnamed: 10 1 non-null object\n", + " 11 Unnamed: 11 1 non-null object\n", + "dtypes: int64(1), object(11)\n", + "memory usage: 96.0+ MB\n", + "None\n", + "Dataset Description:\n", + " word_id\n", + "count 1.048525e+06\n", + "mean 1.411658e+06\n", + "std 8.498497e+05\n", + "min 7.000000e+00\n", + "25% 6.826870e+05\n", + "50% 1.398224e+06\n", + "75% 2.115287e+06\n", + "max 2.856896e+06\n", + "Dataset Sample:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
word_idwordup_votesdown_votesauthordefinitionUnnamed: 6Unnamed: 7Unnamed: 8Unnamed: 9Unnamed: 10Unnamed: 11
07Janky296255dc397b2fUndesirable; less-than optimum.NaNNaNNaNNaNNaNNaN
18slumpin'1637dc397b2flow down and funky, but [knee deep] enough to ...NaNNaNNaNNaNNaNNaN
29yayeeyay1927dc397b2faffirmation; suggestion of encouragement, appr...NaNNaNNaNNaNNaNNaN
312hard-core16296d1610749anything out of our league that can be good or...NaNNaNNaNNaNNaNNaN
413brutal124540ece1efanything that makes you sweatNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " word_id word up_votes down_votes author \\\n", + "0 7 Janky 296 255 dc397b2f \n", + "1 8 slumpin' 16 37 dc397b2f \n", + "2 9 yayeeyay 19 27 dc397b2f \n", + "3 12 hard-core 162 96 d1610749 \n", + "4 13 brutal 12 45 40ece1ef \n", + "\n", + " definition Unnamed: 6 Unnamed: 7 \\\n", + "0 Undesirable; less-than optimum. NaN NaN \n", + "1 low down and funky, but [knee deep] enough to ... NaN NaN \n", + "2 affirmation; suggestion of encouragement, appr... NaN NaN \n", + "3 anything out of our league that can be good or... NaN NaN \n", + "4 anything that makes you sweat NaN NaN \n", + "\n", + " Unnamed: 8 Unnamed: 9 Unnamed: 10 Unnamed: 11 \n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Display some info about dataset\n", + "print(\"Dataset Shape:\")\n", + "print(df.shape)\n", + "print(\"Dataset Columns:\")\n", + "print(df.columns)\n", + "print(\"Dataset Info:\")\n", + "print(df.info())\n", + "print(\"Dataset Description:\")\n", + "print(df.describe())\n", + "print(\"Dataset Sample:\")\n", + "display(df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Search Methodologies\n", + "\n", + "We employ four different search techniques to analyze word queries within the dataset. They all use the pydatastructs backend and API.\n", + "The four algorithms are as follows:\n", + "\n", + "1. Knuth-Morris-Pratt\n", + "2. Rabin–Karp\n", + "3. Boyer-Moore\n", + "4. Z-Function\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class Searcher:\n", + " \"\"\"\n", + " Provides static methods for performing string search using various algorithms\n", + " from the pydatastructs library. It also includes a method to evaluate the performance of these\n", + " search algorithms.\n", + "\n", + " Methods\n", + " -------\n", + " pydatastrcuts_rabin_karp_serach(text: str, pattern: str) -> int\n", + " Uses the Rabin-Karp algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " pydatastructs_kmp_search(text: str, pattern: str) -> int\n", + " Uses the Knuth-Morris-Pratt algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " pydatastructs_boyer_moore_search(text: str, pattern: str) -> int\n", + " Uses the Boyer-Moore algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " pydatastructs_z_function_search(text: str, pattern: str) -> int\n", + " Uses the Z-Function algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " evaluate_search_performance(search_func, text: str, query: str) -> Tuple[int, float]\n", + " Evaluates the performance of a given search function by measuring the execution time\n", + " and the number of results found for a specific query in the text.\n", + " \"\"\"\n", + "\n", + " @staticmethod\n", + " def pydatastrcuts_rabin_karp_serach(text: str, pattern: str) -> int:\n", + " \"\"\"\n", + " Uses the Rabin-Karp algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " Parameters\n", + " ----------\n", + " text : str\n", + " The text in which to search for the pattern.\n", + " pattern : str\n", + " The pattern to search for in the text.\n", + "\n", + " Returns\n", + " -------\n", + " int\n", + " The number of occurrences of the pattern found in the text.\n", + " \"\"\"\n", + " positions = find(text, pattern, algorithm='rabin_karp')\n", + " return len(positions)\n", + "\n", + " @staticmethod\n", + " def pydatastructs_kmp_search(text: str, pattern: str) -> int:\n", + " \"\"\"\n", + " Uses the Knuth-Morris-Pratt algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " Parameters\n", + " ----------\n", + " text : str\n", + " The text in which to search for the pattern.\n", + " pattern : str\n", + " The pattern to search for in the text.\n", + "\n", + " Returns\n", + " -------\n", + " int\n", + " The number of occurrences of the pattern found in the text.\n", + " \"\"\"\n", + " positions = find(text, pattern, algorithm='kmp')\n", + " return len(positions)\n", + "\n", + " @staticmethod\n", + " def pydatastructs_boyer_moore_search(text: str, pattern: str) -> int:\n", + " \"\"\"\n", + " Uses the Boyer-Moore algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " Parameters\n", + " ----------\n", + " text : str\n", + " The text in which to search for the pattern.\n", + " pattern : str\n", + " The pattern to search for in the text.\n", + "\n", + " Returns\n", + " -------\n", + " int\n", + " The number of occurrences of the pattern found in the text.\n", + " \"\"\"\n", + " positions = find(text, pattern, algorithm='boyer_moore')\n", + " return len(positions)\n", + "\n", + " @staticmethod\n", + " def pydatastructs_z_function_search(text: str, pattern: str) -> int:\n", + " \"\"\"\n", + " Uses the Z-Function algorithm to find occurrences of a pattern in a given text.\n", + "\n", + " Parameters\n", + " ----------\n", + " text : str\n", + " The text in which to search for the pattern.\n", + " pattern : str\n", + " The pattern to search for in the text.\n", + "\n", + " Returns\n", + " -------\n", + " int\n", + " The number of occurrences of the pattern found in the text.\n", + " \"\"\"\n", + " positions = find(text, pattern, algorithm='z_function')\n", + " return len(positions)\n", + "\n", + " # Evaluate performance of search functions\n", + " @staticmethod\n", + " def evaluate_search_performance(search_func, text: str, query: str) -> tuple[int, float]:\n", + " \"\"\"\n", + " Evaluates the performance of a given search function by measuring the execution time\n", + " and the number of results found for a specific query in the text.\n", + "\n", + " Parameters\n", + " ----------\n", + " search_func : function\n", + " The search function to evaluate. It should take two arguments: text and query.\n", + " text : str\n", + " The text in which to search for the query.\n", + " query : str\n", + " The query to search for in the text.\n", + "\n", + " Returns\n", + " -------\n", + " tuple[int, float]\n", + " A tuple containing the number of occurrences of the query found in the text\n", + " and the execution time of the search function.\n", + " \"\"\"\n", + " start_time = time.time()\n", + " result = search_func(text, query)\n", + " end_time = time.time()\n", + " execution_time = end_time - start_time\n", + " return result, execution_time" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Best and worst case words for each algorithm\n", + "\n", + "boyer_moore_best: list[str] = [\"xylophone\"] # Unique letters allow large skips\n", + "boyer_moore_worst: list[str] = [\"aaaaa\"] # Repetitive patterns force O(mn)\n", + "\n", + "kmp_best: list[str] = [\"banana\"] # Repeating prefixes make KMP efficient\n", + "# No repeating substrings, so KMP has no major optimizations\n", + "kmp_worst: list[str] = [\"xyzabc\"]\n", + "\n", + "# Quickly identifies repeated prefix-suffix patterns\n", + "z_function_best: list[str] = [\"abracadabra\"]\n", + "# No repeating substrings, making Z-function similar to KMP\n", + "z_function_worst: list[str] = [\"qwerty\"]\n", + "\n", + "# Great for multi-pattern searches\n", + "rabin_karp_best: list[str] = [\"hello\", \"world\", \"search\"]\n", + "# Hash collisions can degrade performance to O(mn)\n", + "rabin_karp_worst: list[str] = [\"abcdefghij\"]\n", + "\n", + "# Test queries\n", + "test_queries = boyer_moore_best + boyer_moore_worst + kmp_best + kmp_worst + \\\n", + " z_function_best + z_function_worst + rabin_karp_best + rabin_karp_worst\n", + "\n", + "\n", + "methods = {\n", + " \"Rabin-Karp\": Searcher.pydatastrcuts_rabin_karp_serach,\n", + " \"Knuth-Morris-Pratt\": Searcher.pydatastructs_kmp_search,\n", + " \"Boyer-Moore\": Searcher.pydatastructs_boyer_moore_search,\n", + " \"Z-Function\": Searcher.pydatastructs_z_function_search\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Run evaluation\n", + "performance_results = []\n", + "text = df['word'].str.cat(sep=' ') # Concatenate all words in the dataset\n", + "for method_name, method_func in methods.items():\n", + " for query in test_queries:\n", + " num_results, exec_time = Searcher.evaluate_search_performance(\n", + " method_func, text, query)\n", + " performance_results.append(\n", + " [method_name, query, num_results, exec_time])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "The comparison of the **Boyer-Moore, Knuth-Morris-Pratt (KMP), Z-function, and Rabin-Karp** string search algorithms highlights their strengths and weaknesses across different scenarios.\n", + "\n", + "- **Boyer-Moore** excels in average-case performance, often achieving **sublinear time**, making it ideal for long patterns in large alphabets. However, it suffers from **O(mn) worst-case performance** on repetitive text.\n", + "- **KMP** and **Z-function** guarantee **O(n + m) worst-case complexity**, making them more reliable for structured pattern matching, though they lack the efficiency of Boyer-Moore in general cases.\n", + "- **Rabin-Karp**, leveraging hashing, performs well in **O(n + m) average-case** but degrades to **O(nm) worst-case** due to hash collisions, making it more suitable for multiple-pattern searches.\n", + "\n", + "Ultimately, the choice depends on the text structure, pattern length, and performance guarantees required for the application.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MethodQueryResults FoundExecution Time (s)
0Rabin-Karpxylophone716.368275
1Rabin-Karpaaaaa25530.503066
2Rabin-Karpbanana51163.199950
3Rabin-Karpxyzabc062.661349
4Rabin-Karpabracadabra158.821800
5Rabin-Karpqwerty6314.416847
6Rabin-Karphello25516.551707
7Rabin-Karpworld102315.397617
8Rabin-Karpsearch12756.959028
9Rabin-Karpabcdefghij1522.469995
10Knuth-Morris-Prattxylophone73.466889
11Knuth-Morris-Prattaaaaa2553.379942
12Knuth-Morris-Prattbanana5113.340413
13Knuth-Morris-Prattxyzabc03.506472
14Knuth-Morris-Prattabracadabra13.423525
15Knuth-Morris-Prattqwerty633.244726
16Knuth-Morris-Pratthello2553.567353
17Knuth-Morris-Prattworld10232.514962
18Knuth-Morris-Prattsearch1272.727760
19Knuth-Morris-Prattabcdefghij152.416548
20Boyer-Moorexylophone70.409917
21Boyer-Mooreaaaaa2550.582294
22Boyer-Moorebanana5110.765548
23Boyer-Moorexyzabc00.721526
24Boyer-Mooreabracadabra10.285021
25Boyer-Mooreqwerty631.034254
26Boyer-Moorehello2550.670151
27Boyer-Mooreworld10230.705552
28Boyer-Mooresearch1270.847801
29Boyer-Mooreabcdefghij150.395121
30Z-Functionxylophone726.620959
31Z-Functionaaaaa25522.801260
32Z-Functionbanana51122.610493
33Z-Functionxyzabc021.820343
34Z-Functionabracadabra120.470267
35Z-Functionqwerty6324.497443
36Z-Functionhello25525.262055
37Z-Functionworld102324.878788
38Z-Functionsearch12724.227696
39Z-Functionabcdefghij1524.152845
\n", + "
" + ], + "text/plain": [ + " Method Query Results Found Execution Time (s)\n", + "0 Rabin-Karp xylophone 7 16.368275\n", + "1 Rabin-Karp aaaaa 255 30.503066\n", + "2 Rabin-Karp banana 511 63.199950\n", + "3 Rabin-Karp xyzabc 0 62.661349\n", + "4 Rabin-Karp abracadabra 1 58.821800\n", + "5 Rabin-Karp qwerty 63 14.416847\n", + "6 Rabin-Karp hello 255 16.551707\n", + "7 Rabin-Karp world 1023 15.397617\n", + "8 Rabin-Karp search 127 56.959028\n", + "9 Rabin-Karp abcdefghij 15 22.469995\n", + "10 Knuth-Morris-Pratt xylophone 7 3.466889\n", + "11 Knuth-Morris-Pratt aaaaa 255 3.379942\n", + "12 Knuth-Morris-Pratt banana 511 3.340413\n", + "13 Knuth-Morris-Pratt xyzabc 0 3.506472\n", + "14 Knuth-Morris-Pratt abracadabra 1 3.423525\n", + "15 Knuth-Morris-Pratt qwerty 63 3.244726\n", + "16 Knuth-Morris-Pratt hello 255 3.567353\n", + "17 Knuth-Morris-Pratt world 1023 2.514962\n", + "18 Knuth-Morris-Pratt search 127 2.727760\n", + "19 Knuth-Morris-Pratt abcdefghij 15 2.416548\n", + "20 Boyer-Moore xylophone 7 0.409917\n", + "21 Boyer-Moore aaaaa 255 0.582294\n", + "22 Boyer-Moore banana 511 0.765548\n", + "23 Boyer-Moore xyzabc 0 0.721526\n", + "24 Boyer-Moore abracadabra 1 0.285021\n", + "25 Boyer-Moore qwerty 63 1.034254\n", + "26 Boyer-Moore hello 255 0.670151\n", + "27 Boyer-Moore world 1023 0.705552\n", + "28 Boyer-Moore search 127 0.847801\n", + "29 Boyer-Moore abcdefghij 15 0.395121\n", + "30 Z-Function xylophone 7 26.620959\n", + "31 Z-Function aaaaa 255 22.801260\n", + "32 Z-Function banana 511 22.610493\n", + "33 Z-Function xyzabc 0 21.820343\n", + "34 Z-Function abracadabra 1 20.470267\n", + "35 Z-Function qwerty 63 24.497443\n", + "36 Z-Function hello 255 25.262055\n", + "37 Z-Function world 1023 24.878788\n", + "38 Z-Function search 127 24.227696\n", + "39 Z-Function abcdefghij 15 24.152845" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Display performance results\n", + "performance_df = pd.DataFrame(performance_results, columns=[\n", + " \"Method\", \"Query\", \"Results Found\", \"Execution Time (s)\"])\n", + "display(performance_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Set the aesthetic style of the plots\n", + "sns.set_theme(context=\"notebook\", style=\"whitegrid\")\n", + "\n", + "# Create a bar plot\n", + "plt.figure(figsize=(12, 6))\n", + "sns.barplot(x=\"Query\", y=\"Execution Time (s)\",\n", + " hue=\"Method\", data=performance_df)\n", + "\n", + "# Add titles and labels\n", + "plt.title(\"Comparison of Execution Times for Different Search Algorithms\")\n", + "plt.xlabel(\"Query\")\n", + "plt.ylabel(\"Execution Time (s)\")\n", + "\n", + "# Display the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}