9 Sep 2023

code-boxx · code-boxx · commit 9b5e227b9315 · 2023-09-09T15:31:13.000+08:00
Installer default to "GPU", "borrowed" accelerate snippet from Oobabooga. Should load and run a little faster now.
diff --git a/ai chatbot/README.md b/ai chatbot/README.md
@@ -5,13 +5,12 @@ https://code-boxx.com/core-boxx-ai-chatbot/
 * [Core Boxx](https://github.com/code-boxx/Core-Boxx-PHP-Framework/tree/main/core)
 * [Python](https://www.python.org/) At the time of writing, 3.9~3.10 works fine.
 * [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/downloads/?q=build+tools)
+* A decent graphics card. Even if you tweak and run with CPU-only, it will be painfully slow...
 
 ## INSTALLATION
 * Copy/unzip this module into your existing Core Boxx project folder.
 * Put documents you want the AI to "learn" into `chatbot/docs`, accepted file types - `csv pdf txt epub html md odt doc docx ppt pptx`.
-* Run setup - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
-  * Windows - Run `0-setup.bat` for "CPU only", or `0-setup.bat GPU` if you have an Nvidia graphics card.
-  * Linux - Run `0-setup.sh` for "CPU only", or `0-setup.sh GPU` if you have an Nvidia graphics card.
+* Run `0-setup.bat` (Windows) `0-setup.sh` (Linux) - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
 * Access `http://your-site.com/ai/` for the demo.
 
 ## NOTES
diff --git a/ai chatbot/chatbot/0-setup.bat b/ai chatbot/chatbot/0-setup.bat
@@ -1,11 +1,11 @@
-php 0-setup.php %1
+php 0-setup.php
 virtualenv venv
 call venv\Scripts\activate
 pip install langchain transformers optimum auto-gptq chromadb sentence_transformers Flask pyjwt
-if "%1"=="GPU" (
-  pip install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu117
-) else (
+if "%1"=="CPU" (
   pip install torch torchvision torchaudio --force-reinstall
+) else (
+  pip install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu117
 )
-python create.py
-python bot.py
+python b_create.py
+python d_bot.py
diff --git a/ai chatbot/chatbot/0-setup.php b/ai chatbot/chatbot/0-setup.php
@@ -1,33 +1,30 @@
 <?php
-// (A) KOA SUTATO
-require dirname(__DIR__) . DIRECTORY_SEPARATOR . "lib" . DIRECTORY_SEPARATOR . "CORE-Go.php";
+// (A) RODO KOA KONFIGU
+require dirname(__DIR__) . DIRECTORY_SEPARATOR . "lib" . DIRECTORY_SEPARATOR . "CORE-Config.php";
 
 // (B) NEW CHATBOT PATH
 define("PATH_CHATBOT", PATH_BASE . "chatbot" . DIRECTORY_SEPARATOR);
 
-// (C) BACKUP CHATBOT/SETTINGS.PY
-if (!copy(PATH_CHATBOT . "settings.py", PATH_CHATBOT . "settings.old")) {
-  exit("Failed to backup settings file - " . PATH_CHATBOT . "settings.old");
+// (C) BACKUP CHATBOT/A_SETTINGS.PY
+if (!copy(PATH_CHATBOT . "a_settings.py", PATH_CHATBOT . "a_settings.old")) {
+  exit("Failed to backup settings file - " . PATH_CHATBOT . "a_settings.old");
 }
 
-// (D) COPY HOST SETTINGS FROM CORE-CONFIG.PHP TO SETTINGS.PY
+// (D) COPY SETTINGS FROM CORE-CONFIG.PHP TO A_SETTINGS.PY
 $replace = [
-  "model_name" => isset($argv[1]) && $argv[1]=="GPU"
-    ? '"TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"'
-    : '"TheBloke/Wizard-Vicuna-7B-Uncensored-GGML"',
   "http_allow" => "[\"http://".HOST_NAME."\", \"https://".HOST_NAME."\"]",
   "http_host" => "\"".HOST_NAME."\"",
   "jwt_algo" => "\"".JWT_ALGO."\"",
   "jwt_secret" => "\"".JWT_SECRET."\""
 ];
-$cfg = file(PATH_CHATBOT . "settings.py") or exit("Cannot read". PATH_CHATBOT ."settings.py");
+$cfg = file(PATH_CHATBOT . "a_settings.py") or exit("Cannot read". PATH_CHATBOT ."a_settings.py");
 foreach ($cfg as $j=>$line) { foreach ($replace as $k=>$v) { if (strpos($line, $k) !== false) {
   $cfg[$j] = "$k = $v # CHANGED BY INSTALLER\r\n";
   unset($replace[$k]);
   if (count($replace)==0) { break; }
 }}}
-try { file_put_contents(PATH_CHATBOT . "settings.py", implode("", $cfg)); }
-catch (Exception $ex) { exit("Error writing to ". PATH_CHATBOT . "settings.py"); }
+try { file_put_contents(PATH_CHATBOT . "a_settings.py", implode("", $cfg)); }
+catch (Exception $ex) { exit("Error writing to ". PATH_CHATBOT . "a_settings.py"); }
 
 // (E) ADD AI TO CORE-CONFIG.PHP
 try {
diff --git a/ai chatbot/chatbot/0-setup.sh b/ai chatbot/chatbot/0-setup.sh
@@ -1,12 +1,12 @@
-php 0-setup.php $1
+php 0-setup.php
 virtualenv venv
 source "venv/bin/activate"
 pip install langchain transformers optimum auto-gptq chromadb sentence_transformers Flask pyjwt
-if [[ $1 == "GPU" ]]
+if [[ $1 == "CPU" ]]
 then
-  pip3 install torch torchvision torchaudio --force-reinstall
+  pip install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cpu
 else
-  pip3 install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cpu
+  pip install torch torchvision torchaudio --force-reinstall
 fi
-python create.py
-python bot.py
+python b_create.py
+python d_bot.py
diff --git a/ai chatbot/chatbot/1-create.bat b/ai chatbot/chatbot/1-create.bat
@@ -1,4 +1,4 @@
 @echo off
 call venv\Scripts\activate
-python create.py
+python b_create.py
 deactivate
diff --git a/ai chatbot/chatbot/1-create.sh b/ai chatbot/chatbot/1-create.sh
@@ -1,3 +1,3 @@
 source "venv/bin/activate"
-python create.py
+python b_create.py
 deactivate
diff --git a/ai chatbot/chatbot/2-bot.bat b/ai chatbot/chatbot/2-bot.bat
@@ -1,4 +1,4 @@
 @echo off
 call venv\Scripts\activate
-python bot.py
+python d_bot.py
 deactivate
diff --git a/ai chatbot/chatbot/2-bot.sh b/ai chatbot/chatbot/2-bot.sh
@@ -1,3 +1,3 @@
 source "venv/bin/activate"
-python bot.py
+python d_bot.py
 deactivate
diff --git a/ai chatbot/chatbot/a_settings.py b/ai chatbot/chatbot/a_settings.py
@@ -1,42 +1,50 @@
-# (A) PATHS
+# (A) PATH
 import os
 path_base = os.path.dirname(os.path.realpath(__file__))
 path_models = os.path.join(path_base, "models")
 path_db = os.path.join(path_base, "db")
 path_docs = os.path.join(path_base, "docs")
+
+# (B) ENVIRONMENT VARIABLES
+os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
 os.environ["TRANSFORMERS_CACHE"] = path_models
 
-# (B) MODEL
-model_name = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
-model_sample = True
-model_max_tokens = 1024
-model_batch_size = 1
-model_temperature = 0.7
-model_top_p = 1
-model_top_k = 40
-model_sequences = 1
+# (C) MODEL SETTINGS
+model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
+model_args = {
+  "do_sample" : True,
+  "max_new_tokens" : 3000,
+  "batch_size" : 1,
+  "temperature" : 0.7,
+  "top_k" : 40,
+  "top_p" : 1,
+  "num_return_sequences" : 1
+}
+
+# (D) CHAIN SETTINGS
+chain_args = {
+  "chain_type" : "stuff",
+  "return_source_documents" : True,
+  "verbose" : True
+}
 
-# (C) PROMPT TEMPLATE
+# (E) PROMPT TEMPLATE
 prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 CONTEXT: {context}
 USER: {question}
 ANSWER:"""
 
-# (D) CHAIN
-chain_verbose = True
-chain_type = "stuff"
-chain_kwargs = 4
-chain_source = True
-
-# (E) DATABASE
-doc_chunks = 512
-doc_overlap = 30
+# (F) DATABASE - DOCUMENT SPLITTER
+db_split = {
+  "chunk_size" : 512,
+  "chunk_overlap" : 30
+}
 
-# (F) HTTP ENDPOINT
+# (G) HTTP ENDPOINT
 http_allow = ["http://localhost"]
 http_host = "localhost"
 http_port = 8008
 
-# (G) JWT
+# (H) JWT
 jwt_algo = ""
 jwt_secret = ""
diff --git a/ai chatbot/chatbot/b_create.py b/ai chatbot/chatbot/b_create.py
@@ -1,5 +1,5 @@
 # (A) LOAD SETTINGS & MODULES
-import settings as set
+import a_settings as set
 import os, glob
 from pathlib import Path
 from langchain.vectorstores import Chroma
@@ -65,9 +65,7 @@ def rmdir(folder):
 db.persist()
 
 # (D2) ADD DOCUMENTS
-splitter = RecursiveCharacterTextSplitter(
-  chunk_size = set.doc_chunks, chunk_overlap = set.doc_overlap
-)
+splitter = RecursiveCharacterTextSplitter(**set.db_split)
 for doc in all:
   print("Adding - " + doc)
   name, ext = os.path.splitext(doc)
diff --git a/ai chatbot/chatbot/c_tf.py b/ai chatbot/chatbot/c_tf.py
@@ -0,0 +1,62 @@
+# (A) LOAD SETTINGS & MODULES
+import a_settings as set
+import torch, psutil
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, pipeline
+from accelerate import infer_auto_device_map, init_empty_weights
+
+# (B) HELPER - AUTO MAX MEMORY CALCULATION
+# credits : https://github.com/oobabooga/text-generation-webui/blob/main/modules/models.py
+def max_mem():
+  # (B1) GPU MEMORY
+  total = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+  suggestion = round((total - 1000) / 1000) * 1000
+  if total - suggestion < 800:
+    suggestion -= 1000
+  suggestion = int(round(suggestion / 1000))
+  max = { 0 : f"{suggestion}GiB" }
+
+  # (B2) CPU MEMORY
+  total = (psutil.virtual_memory().available / (1024 * 1024))
+  suggestion = round((total - 1000) / 1000) * 1000
+  if total - suggestion < 800:
+    suggestion -= 1000
+  suggestion = int(round(suggestion / 1000))
+  max["cpu"] = f"{suggestion}GiB"
+
+  # (B3) RETURN CALCULATED MEMORY
+  return max
+
+# (C) LOAD MODEL
+# (C1) INIT PARAMS
+params = {
+  "low_cpu_mem_usage": True,
+  "device_map" : "auto"
+}
+
+# (C2) CPU ONLY
+if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
+  params["torch_dtype"] = torch.float32
+
+# (C3) GPU ACCELERATED
+else:
+  config = AutoConfig.from_pretrained(set.model_name)
+  with init_empty_weights():
+    model = AutoModelForCausalLM.from_config(config)
+  model.tie_weights()
+  params["device_map"] = infer_auto_device_map(
+    model,
+    dtype = config.torch_dtype,
+    max_memory = max_mem(),
+    no_split_module_classes = model._no_split_modules
+  )
+
+# (C4) LOAD MODEL
+model = AutoModelForCausalLM.from_pretrained(set.model_name, **params)
+
+# (D) PIPE
+pipe = pipeline(
+  task = "text-generation",
+  model = model,
+  tokenizer = AutoTokenizer.from_pretrained(set.model_name),
+  ** set.model_args
+)
diff --git a/ai chatbot/chatbot/d_bot.py b/ai chatbot/chatbot/d_bot.py
@@ -1,55 +1,37 @@
 # (A) LOAD SETTINGS & MODULES
-import settings as set
+# (A1) SETTINGS & TRANSFORMER
+import a_settings as set
+import c_tf as tf
+
+# (A2) FLASK
+# @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
+# import jwt 
 from flask import Flask, Response, request
-import torch, jwt
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+
+# (A3) LANGCHAIN
 from langchain import PromptTemplate, HuggingFacePipeline
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 
-# (B) TOKENIZER + MODEL + DATABASE
-tokenizer = AutoTokenizer.from_pretrained(set.model_name)
-model = AutoModelForCausalLM.from_pretrained(
-  set.model_name,
-  torch_dtype = torch.float16,
-  device_map = "auto"
-)
-db = Chroma(
-  persist_directory = set.path_db,
-  embedding_function = HuggingFaceEmbeddings()
-)
-
-# (C) PIPE + CHAIN
-pipe = pipeline(
-  task = "text-generation",
-  model = model,
-  tokenizer = tokenizer,
-  eos_token_id = tokenizer.eos_token_id,
-  do_sample = set.model_sample,
-  max_new_tokens = set.model_max_tokens,
-  batch_size = set.model_batch_size,
-  temperature = set.model_temperature,
-  top_k = set.model_top_k,
-  top_p = set.model_top_p,
-  num_return_sequences = set.model_sequences
-)
+# (B) CHAIN
 chain = RetrievalQA.from_chain_type(
-  chain_type = set.chain_type,
-  llm = HuggingFacePipeline(pipeline = pipe),
-  retriever = db.as_retriever(search_kwargs = {"k": set.chain_kwargs}),
+  llm = HuggingFacePipeline(pipeline = tf.pipe),
+  retriever = Chroma(
+    persist_directory = set.path_db,
+    embedding_function = HuggingFaceEmbeddings()
+  ).as_retriever(),
   chain_type_kwargs = {
     "prompt": PromptTemplate (
       template = set.prompt_template,
       input_variables = ["question", "context"]
     )
   },
-  return_source_documents = set.chain_source,
-  verbose = set.chain_verbose
+  ** set.chain_args
 )
 
 """ @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
-# (D) VERIFY USER
+# (C) VERIFY USER
 def jwtVerify(cookies):
   try:
     token = jwt.decode(
@@ -66,19 +48,19 @@ def jwtVerify(cookies):
     return False
 """
 
-# (E) FLASK
+# (D) FLASK
 app = Flask(__name__)
 @app.route("/", methods = ["POST"])
 def bot():
-  # (E1) CORS
+  # (D1) CORS
   if "HTTP_ORIGIN" in request.environ and request.environ["HTTP_ORIGIN"] in set.http_allow:
-    # (E1-1) ALLOW ONLY REGISTERED USERS
+    # (D1-1) ALLOW ONLY REGISTERED USERS
     """ @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
     if jwtVerify(request.cookies) is False:
       return Response("Not Allowed", status = 405)
     """
 
-    # (E1-2) ANSWER THE QUESTION
+    # (D1-2) ANSWER THE QUESTION
     data = dict(request.form)
     if "query" in data:
       ans = chain(data["query"])
@@ -89,12 +71,12 @@ def bot():
     response.headers.add("Access-Control-Allow-Origin", request.environ["HTTP_ORIGIN"] )
     response.headers.add("Access-Control-Allow-Credentials", "true")
 
-  # (E2) ORIGIN NOT ALLOWED
+  # (D2) ORIGIN NOT ALLOWED
   else:
     response = Response("Not Allowed", status = 405)
   return response
 
-# (F) GO!
+# (E) GO!
 if __name__ == "__main__":
   app.run(
     host = set.http_host,