10 Sep 2023

code-boxx · code-boxx · commit 61e046a0df36 · 2023-09-10T20:25:42.000+08:00
Installer sequence change.
diff --git a/ai chatbot/README.md b/ai chatbot/README.md
@@ -10,24 +10,20 @@ https://code-boxx.com/core-boxx-ai-chatbot/
 
 ## RECOMMENDED
 * An Nvidia graphics card with at least 8GB VRAM is highly recommended.
-* You CAN run on CPU, but that will be painfully slow.
+* You can TRY to run on CPU-only, but it is painfully slow.
 
 ## INSTALLATION
 * Copy/unzip this module into your existing Core Boxx project folder.
 * Put documents you want the AI to "learn" into `chatbot/docs`, accepted file types - `csv pdf txt epub html md odt doc docx ppt pptx`.
 * Start install - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
   * GPU - Run `0-setup.bat` (Windows) `0-setup.sh` (Linux).
-  * CPU - Run `0-setup.bat CPU` (Windows) `0-setup.sh CPU` (Linux). You will need to manually download your own model, see "changing models" below.
-* Access `http://your-site.com/ai/` for the demo.
+  * CPU - Run `0-setup.bat CPU` (Windows) `0-setup.sh CPU` (Linux). 
+* You will need to [choose and download an AI model](https://code-boxx.com/core-boxx-ai-chatbot/#sec-choose).
+* Run `2-bot.bat 2-bot.sh`, access `http://your-site.com/ai/` for the demo.
 
-## CHANGING MODELS
-* This module runs on [llama.cpp](https://github.com/ggerganov/llama.cpp).
-* Just put your downloaded `GGML/GGUF`` model into `chatbot/models`.
-* Change `model_name` in `a_settings.py` to the model file name.
-
-## NOTES
-* To rebuild the documents database, simply add/remove documents from `chatbot/docs` and run `1-create.bat / 1-create.sh`.
-* To launch the bot, simply run `2-bot.bat / 2-bot.sh`.
+## REBUILD THE DATEBASE
+* Simply add/remove documents from `chatbot/docs`.
+* Run `1-create.bat / 1-create.sh`.
 
 ## LICENSE
 Copyright by Code Boxx
diff --git a/ai chatbot/chatbot/0-setup.bat b/ai chatbot/chatbot/0-setup.bat
@@ -14,8 +14,4 @@ if "%1"=="CPU" (
 )
 pip install --no-cache-dir --upgrade --force-reinstall llama-cpp-python
 python b_create.py
-if "%1"=="CPU" (
-  echo "Install complete - Please download your own model before running 2-bot.bat"
-) else (
-  python d_bot.py
-)
+echo "Install complete - Please download your own model before running 2-bot.bat"
diff --git a/ai chatbot/chatbot/0-setup.sh b/ai chatbot/chatbot/0-setup.sh
@@ -11,8 +11,4 @@ else
   CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --no-cache-dir --upgrade --force-reinstall llama-cpp-python
 fi
 python b_create.py
-if [[ $1 == "CPU" ]]
-then
-  echo "Install complete - Please download your own model before running 2-bot.sh"
-else
-  python d_bot.py
+echo "Install complete - Please download your own model before running 2-bot.sh"
diff --git a/ai chatbot/chatbot/a_settings.py b/ai chatbot/chatbot/a_settings.py
@@ -1,50 +1,70 @@
-# (A) LOAD MODULES
-import os, torch
-
-# (B) MODEL
+# (A) MODEL
 # hugging face url path, or model file inside models/
-model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
-#model_name = "llama-2-7b.Q5_K_M.gguf"
+#model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
+model_name = "llama-2-7b-chat.Q5_K_M.gguf"
 
-# (C) AUTO - PATH
+# (B) AUTO - PATH
+import os
 path_base = os.path.dirname(os.path.realpath(__file__))
 path_models = os.path.join(path_base, "models")
 path_db = os.path.join(path_base, "db")
 path_docs = os.path.join(path_base, "docs")
 
+# (C) AUTO - CPU OR GPU
+import torch
+if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
+  gpu = False
+else:
+  gpu = True
+
 # (D) LLAMA CPP
 if os.path.isfile(os.path.join(path_models, model_name)):
+  # (D1) LLAMA MODEL FILE
   model_file = os.path.join(path_models, model_name)
+
+  # (D2) LLAMA MODEL SETTINGS
+  # https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html
+  # FACTUAL
   model_args = {
-    "max_tokens" : 2000,
+    "repeat_penalty" : 1.176,
     "temperature" : 0.7,
     "top_k" : 40,
-    "top_p" : 1,
+    "top_p" : 0.1,
+    "n_ctx" : 3000,
+    "max_tokens" : 3000,
     "n_gpu_layers" : 40,
     "n_batch" : 512,
     "streaming" : False,
     "verbose" : False
   }
+  """ CREATIVE
+  "repeat_penalty" : 1.1,
+  "temperature" : 0.75,
+  "top_k" : 0,
+  "top_p" : 0.7,
+  """
 
 # (E) HF TRANSFORMER
 else:
+  # (E1) TRANSFORMER ENVIRONMENT VARIABLES
   os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
   os.environ["TRANSFORMERS_CACHE"] = path_models
+
+  # (E2) MODEL VARIABLES
+  # https://huggingface.co/docs/transformers/main_classes/text_generation
   model_args = {
     "do_sample" : True,
-    "max_new_tokens" : 2000,
-    "batch_size" : 1,
     "temperature" : 0.7,
     "top_k" : 40,
     "top_p" : 1,
-    "num_return_sequences" : 1
+    "max_new_tokens" : 3000
   }
 
-# (F) AUTO - CPU OR GPU
-if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
-  gpu = False
-else:
-  gpu = True
+# (F) PROMPT TEMPLATE
+prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+CONTEXT: {context}
+USER: {question}
+ANSWER:"""
 
 # (G) EMBEDDING
 embed_args = {
@@ -65,17 +85,11 @@
   "verbose" : True
 }
 
-# (J) PROMPT TEMPLATE
-prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-CONTEXT: {context}
-USER: {question}
-ANSWER:"""
-
-# (K) HTTP ENDPOINT
+# (J) HTTP ENDPOINT
 http_allow = ["http://localhost"]
 http_host = "localhost"
 http_port = 8008
 
-# (L) JWT
+# (K) JWT
 jwt_algo = ""
 jwt_secret = ""
diff --git a/ai chatbot/chatbot/c_oto_rodo.py b/ai chatbot/chatbot/c_oto_rodo.py
@@ -44,7 +44,6 @@ def max_mem():
 
   # (C3) INIT MODEL PARAMS
   params = {
-    "low_cpu_mem_usage": True,
     "device_map" : "auto"
   }
 
diff --git a/ai chatbot/chatbot/d_bot.py b/ai chatbot/chatbot/d_bot.py
@@ -1,15 +1,12 @@
 # (A) LOAD SETTINGS & MODULES
-# (A1) SETTINGS & LANGCHAIN
 import a_settings as set
 import c_oto_rodo as oto
 from langchain import PromptTemplate
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.chains import RetrievalQA
-
-# (A2) FLASK
-# import jwt # @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
 from flask import Flask, Response, request
+# import jwt # @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
 
 # (B) CHAIN
 chain = RetrievalQA.from_chain_type(
@@ -65,7 +62,7 @@ def bot():
     else:
       ans = "Where's the question, yo?"
     response = Response(ans, status = 200)
-    response.headers.add("Access-Control-Allow-Origin", request.environ["HTTP_ORIGIN"] )
+    response.headers.add("Access-Control-Allow-Origin", request.environ["HTTP_ORIGIN"])
     response.headers.add("Access-Control-Allow-Credentials", "true")
 
   # (D2) ORIGIN NOT ALLOWED

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,6 @@ def max_mem():`
`44`	`44`
`45`	`45`	`# (C3) INIT MODEL PARAMS`
`46`	`46`	`params = {`
`47`		`- "low_cpu_mem_usage": True,`
`48`	`47`	`"device_map" : "auto"`
`49`	`48`	`}`
`50`	`49`