Skip to content

Commit 61e046a

Browse files
committed
10 Sep 2023
Installer sequence change.
1 parent f0304b6 commit 61e046a

File tree

6 files changed

+50
-52
lines changed

6 files changed

+50
-52
lines changed

ai chatbot/README.md

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,20 @@ https://code-boxx.com/core-boxx-ai-chatbot/
1010

1111
## RECOMMENDED
1212
* An Nvidia graphics card with at least 8GB VRAM is highly recommended.
13-
* You CAN run on CPU, but that will be painfully slow.
13+
* You can TRY to run on CPU-only, but it is painfully slow.
1414

1515
## INSTALLATION
1616
* Copy/unzip this module into your existing Core Boxx project folder.
1717
* Put documents you want the AI to "learn" into `chatbot/docs`, accepted file types - `csv pdf txt epub html md odt doc docx ppt pptx`.
1818
* Start install - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
1919
* GPU - Run `0-setup.bat` (Windows) `0-setup.sh` (Linux).
20-
* CPU - Run `0-setup.bat CPU` (Windows) `0-setup.sh CPU` (Linux). You will need to manually download your own model, see "changing models" below.
21-
* Access `http://your-site.com/ai/` for the demo.
20+
* CPU - Run `0-setup.bat CPU` (Windows) `0-setup.sh CPU` (Linux).
21+
* You will need to [choose and download an AI model](https://code-boxx.com/core-boxx-ai-chatbot/#sec-choose).
22+
* Run `2-bot.bat 2-bot.sh`, access `http://your-site.com/ai/` for the demo.
2223

23-
## CHANGING MODELS
24-
* This module runs on [llama.cpp](https://github.com/ggerganov/llama.cpp).
25-
* Just put your downloaded `GGML/GGUF`` model into `chatbot/models`.
26-
* Change `model_name` in `a_settings.py` to the model file name.
27-
28-
## NOTES
29-
* To rebuild the documents database, simply add/remove documents from `chatbot/docs` and run `1-create.bat / 1-create.sh`.
30-
* To launch the bot, simply run `2-bot.bat / 2-bot.sh`.
24+
## REBUILD THE DATEBASE
25+
* Simply add/remove documents from `chatbot/docs`.
26+
* Run `1-create.bat / 1-create.sh`.
3127

3228
## LICENSE
3329
Copyright by Code Boxx

ai chatbot/chatbot/0-setup.bat

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,4 @@ if "%1"=="CPU" (
1414
)
1515
pip install --no-cache-dir --upgrade --force-reinstall llama-cpp-python
1616
python b_create.py
17-
if "%1"=="CPU" (
18-
echo "Install complete - Please download your own model before running 2-bot.bat"
19-
) else (
20-
python d_bot.py
21-
)
17+
echo "Install complete - Please download your own model before running 2-bot.bat"

ai chatbot/chatbot/0-setup.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,4 @@ else
1111
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --no-cache-dir --upgrade --force-reinstall llama-cpp-python
1212
fi
1313
python b_create.py
14-
if [[ $1 == "CPU" ]]
15-
then
16-
echo "Install complete - Please download your own model before running 2-bot.sh"
17-
else
18-
python d_bot.py
14+
echo "Install complete - Please download your own model before running 2-bot.sh"

ai chatbot/chatbot/a_settings.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,70 @@
1-
# (A) LOAD MODULES
2-
import os, torch
3-
4-
# (B) MODEL
1+
# (A) MODEL
52
# hugging face url path, or model file inside models/
6-
model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
7-
#model_name = "llama-2-7b.Q5_K_M.gguf"
3+
#model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
4+
model_name = "llama-2-7b-chat.Q5_K_M.gguf"
85

9-
# (C) AUTO - PATH
6+
# (B) AUTO - PATH
7+
import os
108
path_base = os.path.dirname(os.path.realpath(__file__))
119
path_models = os.path.join(path_base, "models")
1210
path_db = os.path.join(path_base, "db")
1311
path_docs = os.path.join(path_base, "docs")
1412

13+
# (C) AUTO - CPU OR GPU
14+
import torch
15+
if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
16+
gpu = False
17+
else:
18+
gpu = True
19+
1520
# (D) LLAMA CPP
1621
if os.path.isfile(os.path.join(path_models, model_name)):
22+
# (D1) LLAMA MODEL FILE
1723
model_file = os.path.join(path_models, model_name)
24+
25+
# (D2) LLAMA MODEL SETTINGS
26+
# https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html
27+
# FACTUAL
1828
model_args = {
19-
"max_tokens" : 2000,
29+
"repeat_penalty" : 1.176,
2030
"temperature" : 0.7,
2131
"top_k" : 40,
22-
"top_p" : 1,
32+
"top_p" : 0.1,
33+
"n_ctx" : 3000,
34+
"max_tokens" : 3000,
2335
"n_gpu_layers" : 40,
2436
"n_batch" : 512,
2537
"streaming" : False,
2638
"verbose" : False
2739
}
40+
""" CREATIVE
41+
"repeat_penalty" : 1.1,
42+
"temperature" : 0.75,
43+
"top_k" : 0,
44+
"top_p" : 0.7,
45+
"""
2846

2947
# (E) HF TRANSFORMER
3048
else:
49+
# (E1) TRANSFORMER ENVIRONMENT VARIABLES
3150
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
3251
os.environ["TRANSFORMERS_CACHE"] = path_models
52+
53+
# (E2) MODEL VARIABLES
54+
# https://huggingface.co/docs/transformers/main_classes/text_generation
3355
model_args = {
3456
"do_sample" : True,
35-
"max_new_tokens" : 2000,
36-
"batch_size" : 1,
3757
"temperature" : 0.7,
3858
"top_k" : 40,
3959
"top_p" : 1,
40-
"num_return_sequences" : 1
60+
"max_new_tokens" : 3000
4161
}
4262

43-
# (F) AUTO - CPU OR GPU
44-
if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
45-
gpu = False
46-
else:
47-
gpu = True
63+
# (F) PROMPT TEMPLATE
64+
prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
65+
CONTEXT: {context}
66+
USER: {question}
67+
ANSWER:"""
4868

4969
# (G) EMBEDDING
5070
embed_args = {
@@ -65,17 +85,11 @@
6585
"verbose" : True
6686
}
6787

68-
# (J) PROMPT TEMPLATE
69-
prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
70-
CONTEXT: {context}
71-
USER: {question}
72-
ANSWER:"""
73-
74-
# (K) HTTP ENDPOINT
88+
# (J) HTTP ENDPOINT
7589
http_allow = ["http://localhost"]
7690
http_host = "localhost"
7791
http_port = 8008
7892

79-
# (L) JWT
93+
# (K) JWT
8094
jwt_algo = ""
8195
jwt_secret = ""

ai chatbot/chatbot/c_oto_rodo.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def max_mem():
4444

4545
# (C3) INIT MODEL PARAMS
4646
params = {
47-
"low_cpu_mem_usage": True,
4847
"device_map" : "auto"
4948
}
5049

ai chatbot/chatbot/d_bot.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
# (A) LOAD SETTINGS & MODULES
2-
# (A1) SETTINGS & LANGCHAIN
32
import a_settings as set
43
import c_oto_rodo as oto
54
from langchain import PromptTemplate
65
from langchain.vectorstores import Chroma
76
from langchain.embeddings import HuggingFaceInstructEmbeddings
87
from langchain.chains import RetrievalQA
9-
10-
# (A2) FLASK
11-
# import jwt # @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
128
from flask import Flask, Response, request
9+
# import jwt # @TODO - ENABLE THIS TO OPEN FOR REGISTERED USERS ONLY
1310

1411
# (B) CHAIN
1512
chain = RetrievalQA.from_chain_type(
@@ -65,7 +62,7 @@ def bot():
6562
else:
6663
ans = "Where's the question, yo?"
6764
response = Response(ans, status = 200)
68-
response.headers.add("Access-Control-Allow-Origin", request.environ["HTTP_ORIGIN"] )
65+
response.headers.add("Access-Control-Allow-Origin", request.environ["HTTP_ORIGIN"])
6966
response.headers.add("Access-Control-Allow-Credentials", "true")
7067

7168
# (D2) ORIGIN NOT ALLOWED

0 commit comments

Comments
 (0)