From 161f562171ef32b2e0c47069822e49202a22fa83 Mon Sep 17 00:00:00 2001 From: shanshi Date: Sat, 15 Jun 2024 22:14:58 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E5=8D=87=E7=BA=A7langchain=E7=AD=89?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E5=90=8C=E6=97=B6=E8=B0=83=E6=95=B4llm=5Fcon?= =?UTF-8?q?fig=E7=9B=B8=E5=85=B3=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- muagent/chat/search_chat.py | 2 +- muagent/codechat/code_search/code_search.py | 2 +- .../codechat/code_search/cypher_generator.py | 5 +- .../codebase_handler/code_importer.py | 2 +- muagent/connector/memory_manager.py | 6 +- .../{ => llm_models}/embeddings/__init__.py | 0 .../get_embedding.py | 6 +- .../huggingface_embedding.py | 2 +- muagent/llm_models/llm_config.py | 8 +- .../openai_embedding.py | 26 +++- muagent/llm_models/openai_model.py | 129 ++++++++++++------ .../commands/__init__.py | 0 .../commands/default_vs_cds.py | 0 .../retrieval/document_loaders/json_loader.py | 2 +- .../document_loaders/jsonl_loader.py | 2 +- muagent/{embeddings => retrieval}/faiss_m.py | 12 +- .../{embeddings => retrieval}/in_memory.py | 4 +- muagent/{embeddings => retrieval}/utils.py | 2 +- muagent/service/base_service.py | 4 +- muagent/service/cb_api.py | 2 +- muagent/service/faiss_db_service.py | 10 +- muagent/service/kb_api.py | 2 +- muagent/utils/path_utils.py | 2 +- requirements.txt | 13 +- tests/codechat/codebasehander_test.py | 10 +- tests/connector/agent_test.py | 11 +- tests/connector/chain_test.py | 1 - tests/connector/memory_manager_test.py | 2 +- tests/connector/phase_test.py | 1 - tests/llm_models/openai_test.py | 43 ++---- 31 files changed, 187 insertions(+), 127 deletions(-) rename muagent/{ => llm_models}/embeddings/__init__.py (100%) rename muagent/{embeddings => llm_models}/get_embedding.py (96%) rename muagent/{embeddings => llm_models}/huggingface_embedding.py (98%) rename muagent/{embeddings => llm_models}/openai_embedding.py (82%) rename muagent/{embeddings => retrieval}/commands/__init__.py (100%) rename muagent/{embeddings => retrieval}/commands/default_vs_cds.py (100%) rename muagent/{embeddings => retrieval}/faiss_m.py (96%) rename muagent/{embeddings => retrieval}/in_memory.py (89%) rename muagent/{embeddings => retrieval}/utils.py (92%) diff --git a/.gitignore b/.gitignore index ed2377e..73e0fc3 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ setup_test.py build *egg-info dist -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +zdatafront* \ No newline at end of file diff --git a/muagent/chat/search_chat.py b/muagent/chat/search_chat.py index 3d21798..cfc2b23 100644 --- a/muagent/chat/search_chat.py +++ b/muagent/chat/search_chat.py @@ -5,7 +5,7 @@ from langchain.callbacks import AsyncIteratorCallbackHandler from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain.prompts.chat import ChatPromptTemplate -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document # from configs.model_config import ( # PROMPT_TEMPLATE, SEARCH_ENGINE_TOP_K, BING_SUBSCRIPTION_KEY, BING_SEARCH_URL, diff --git a/muagent/codechat/code_search/code_search.py b/muagent/codechat/code_search/code_search.py index 554d228..d3f5c22 100644 --- a/muagent/codechat/code_search/code_search.py +++ b/muagent/codechat/code_search/code_search.py @@ -15,7 +15,7 @@ from muagent.codechat.code_search.cypher_generator import CypherGenerator from muagent.codechat.code_search.tagger import Tagger -from muagent.embeddings.get_embedding import get_embedding +from muagent.llm_models.get_embedding import get_embedding from muagent.llm_models.llm_config import LLMConfig, EmbedConfig diff --git a/muagent/codechat/code_search/cypher_generator.py b/muagent/codechat/code_search/cypher_generator.py index 19929d3..893dc73 100644 --- a/muagent/codechat/code_search/cypher_generator.py +++ b/muagent/codechat/code_search/cypher_generator.py @@ -5,7 +5,7 @@ @time: 2023/11/24 上午10:17 @desc: ''' -from langchain import PromptTemplate +from langchain.prompts import PromptTemplate from loguru import logger from muagent.llm_models.openai_model import getChatModelFromConfig @@ -14,7 +14,8 @@ from langchain.schema import ( HumanMessage, ) -from langchain.chains.graph_qa.prompts import NGQL_GENERATION_PROMPT, CYPHER_GENERATION_TEMPLATE +# from langchain.chains.graph_qa.prompts import NGQL_GENERATION_PROMPT, CYPHER_GENERATION_TEMPLATE +from langchain_community.chains.graph_qa.prompts import CYPHER_GENERATION_TEMPLATE schema = ''' Node properties: [{'tag': 'package', 'properties': []}, {'tag': 'class', 'properties': []}, {'tag': 'method', 'properties': []}] diff --git a/muagent/codechat/codebase_handler/code_importer.py b/muagent/codechat/codebase_handler/code_importer.py index aad74c5..588c7dc 100644 --- a/muagent/codechat/codebase_handler/code_importer.py +++ b/muagent/codechat/codebase_handler/code_importer.py @@ -12,7 +12,7 @@ from muagent.db_handler.graph_db_handler.nebula_handler import NebulaHandler from muagent.db_handler.vector_db_handler.chroma_handler import ChromaHandler -from muagent.embeddings.get_embedding import get_embedding +from muagent.llm_models.get_embedding import get_embedding from muagent.llm_models.llm_config import EmbedConfig diff --git a/muagent/connector/memory_manager.py b/muagent/connector/memory_manager.py index 5ce1486..6096464 100644 --- a/muagent/connector/memory_manager.py +++ b/muagent/connector/memory_manager.py @@ -6,14 +6,14 @@ from loguru import logger import numpy as np -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document from .schema import Memory, Message from muagent.service.service_factory import KBServiceFactory from muagent.llm_models import getChatModelFromConfig from muagent.llm_models.llm_config import EmbedConfig, LLMConfig -from muagent.embeddings.utils import load_embeddings_from_path +from muagent.retrieval.utils import load_embeddings_from_path from muagent.utils.common_utils import * from muagent.connector.configs.prompts import CONV_SUMMARY_PROMPT_SPEC from muagent.orm import table_init @@ -489,7 +489,7 @@ def check_chat_index(self, chat_index: str): from muagent.utils.tbase_util import TbaseHandler -from muagent.embeddings.get_embedding import get_embedding +from muagent.llm_models.get_embedding import get_embedding from redis.commands.search.field import ( TextField, NumericField, diff --git a/muagent/embeddings/__init__.py b/muagent/llm_models/embeddings/__init__.py similarity index 100% rename from muagent/embeddings/__init__.py rename to muagent/llm_models/embeddings/__init__.py diff --git a/muagent/embeddings/get_embedding.py b/muagent/llm_models/get_embedding.py similarity index 96% rename from muagent/embeddings/get_embedding.py rename to muagent/llm_models/get_embedding.py index 8619e25..307b0f6 100644 --- a/muagent/embeddings/get_embedding.py +++ b/muagent/llm_models/get_embedding.py @@ -8,8 +8,8 @@ from loguru import logger # from configs.model_config import EMBEDDING_MODEL -from muagent.embeddings.openai_embedding import OpenAIEmbedding -from muagent.embeddings.huggingface_embedding import HFEmbedding +from muagent.llm_models.openai_embedding import OpenAIEmbedding +from muagent.llm_models.huggingface_embedding import HFEmbedding from muagent.llm_models.llm_config import EmbedConfig def get_embedding( @@ -35,7 +35,7 @@ def get_embedding( oae = OpenAIEmbedding() emb_res = oae.get_emb(text_list) elif engine == 'model': - hfe = HFEmbedding(model_path, embedding_device) + hfe = HFEmbedding(model_path, embed_config.model_device) emb_res = hfe.get_emb(text_list) return emb_res diff --git a/muagent/embeddings/huggingface_embedding.py b/muagent/llm_models/huggingface_embedding.py similarity index 98% rename from muagent/embeddings/huggingface_embedding.py rename to muagent/llm_models/huggingface_embedding.py index 1b6d5d0..5568241 100644 --- a/muagent/embeddings/huggingface_embedding.py +++ b/muagent/llm_models/huggingface_embedding.py @@ -8,7 +8,7 @@ from loguru import logger # from configs.model_config import EMBEDDING_DEVICE # from configs.model_config import embedding_model_dict -from muagent.embeddings.utils import load_embeddings, load_embeddings_from_path +from muagent.retrieval.utils import load_embeddings, load_embeddings_from_path class HFEmbedding: diff --git a/muagent/llm_models/llm_config.py b/muagent/llm_models/llm_config.py index 9dac682..504639d 100644 --- a/muagent/llm_models/llm_config.py +++ b/muagent/llm_models/llm_config.py @@ -11,6 +11,7 @@ class LLMConfig: def __init__( self, model_name: str = "gpt-3.5-turbo", + model_engine: str = "openai", temperature: float = 0.25, stop: Union[List[str], str] = None, api_key: str = "", @@ -19,12 +20,15 @@ def __init__( llm: LLM = None, **kwargs ): - + # only support http connection with others + # llm_model init config self.model_name: str = model_name + self.model_engine: str = model_engine self.temperature: float = temperature self.stop: Union[List[str], str] = stop self.api_key: str = api_key self.api_base_url: str = api_base_url + # custom llm self.llm: LLM = llm # self.check_config() @@ -55,7 +59,7 @@ def __init__( self.model_device: str = model_device self.api_key: str = api_key self.api_base_url: str = api_base_url - # + # custom embeddings self.langchain_embeddings = langchain_embeddings # self.check_config() diff --git a/muagent/embeddings/openai_embedding.py b/muagent/llm_models/openai_embedding.py similarity index 82% rename from muagent/embeddings/openai_embedding.py rename to muagent/llm_models/openai_embedding.py index 180f654..17c70da 100644 --- a/muagent/embeddings/openai_embedding.py +++ b/muagent/llm_models/openai_embedding.py @@ -5,7 +5,9 @@ @time: 2023/11/22 上午10:45 @desc: ''' + import openai +from openai import OpenAI import base64 import json import os @@ -14,18 +16,34 @@ class OpenAIEmbedding: def __init__(self): - pass + + try: + from zdatafront import ZDataFrontClient + from zdatafront.openai import SyncProxyHttpClient + # zdatafront 分配的业务标记 + VISIT_DOMAIN = os.environ.get("visit_domain") + VISIT_BIZ = os.environ.get("visit_biz") + VISIT_BIZ_LINE = os.environ.get("visit_biz_line") + # zdatafront 提供的统一加密密钥 + aes_secret_key = os.environ.get("aes_secret_key") + + zdatafront_client = ZDataFrontClient(visit_domain=VISIT_DOMAIN, visit_biz=VISIT_BIZ, visit_biz_line=VISIT_BIZ_LINE, aes_secret_key=aes_secret_key) + self.http_client = SyncProxyHttpClient(zdatafront_client=zdatafront_client, prefer_async=True) + except Exception as e: + logger.warning("There is no zdatafront, just as openai config") + self.http_client = None def get_emb(self, text_list): openai.api_key = os.environ["OPENAI_API_KEY"] - openai.api_base = os.environ["API_BASE_URL"] + openai.base_url = os.environ["API_BASE_URL"] # change , to ,to avoid bug modified_text_list = [i.replace(',', ',') for i in text_list] + client = OpenAI(http_client=self.http_client) - emb_all_result = openai.Embedding.create( + emb_all_result = client.embeddings.create( + input=modified_text_list, model="text-embedding-ada-002", - input=modified_text_list ) res = {} diff --git a/muagent/llm_models/openai_model.py b/muagent/llm_models/openai_model.py index fa19255..ead3b1c 100644 --- a/muagent/llm_models/openai_model.py +++ b/muagent/llm_models/openai_model.py @@ -3,7 +3,8 @@ from loguru import logger from langchain.callbacks import AsyncIteratorCallbackHandler -from langchain.chat_models import ChatOpenAI +# from langchain.chat_models import ChatOpenAI +from langchain_openai import ChatOpenAI from langchain.llms.base import LLM from .llm_config import LLMConfig @@ -21,57 +22,107 @@ def __call__(self, prompt: str, def _call(self, prompt: str, stop: Optional[List[str]] = None): - return self.llm(prompt, stop) + return self(prompt, stop) def predict(self, prompt: str, stop: Optional[List[str]] = None): - return self.llm(prompt, stop) + return self(prompt, stop) def batch(self, prompts: str, stop: Optional[List[str]] = None): - return [self.llm(prompt, stop) for prompt in prompts] + return [self(prompt, stop) for prompt in prompts] + + + +class OpenAILLMModel(CustomLLMModel): + + def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None,): + # logger.debug(f"llm type is {type(llm_config.llm)}") + try: + from zdatafront import ZDataFrontClient + from zdatafront.openai import SyncProxyHttpClient + # zdatafront 分配的业务标记 + VISIT_DOMAIN = os.environ.get("visit_domain") + VISIT_BIZ = os.environ.get("visit_biz") + VISIT_BIZ_LINE = os.environ.get("visit_biz_line") + # zdatafront 提供的统一加密密钥 + aes_secret_key = os.environ.get("aes_secret_key") + + zdatafront_client = ZDataFrontClient(visit_domain=VISIT_DOMAIN, visit_biz=VISIT_BIZ, visit_biz_line=VISIT_BIZ_LINE, aes_secret_key=aes_secret_key) + http_client = SyncProxyHttpClient(zdatafront_client=zdatafront_client, prefer_async=True) + except Exception as e: + logger.warning("There is no zdatafront, you just do as openai config") + http_client = None + + if llm_config is None: + self.llm = ChatOpenAI( + streaming=True, + verbose=True, + api_key=os.environ.get("api_key"), + base_url=os.environ.get("api_base_url"), + model_name=os.environ.get("LLM_MODEL", "gpt-3.5-turbo"), + temperature=os.environ.get("temperature", 0.5), + model_kwargs={"stop": os.environ.get("stop", "")}, + http_client=http_client + ) + else: + self.llm = ChatOpenAI( + streaming=True, + verbose=True, + model_name=llm_config.model_name, + temperature=llm_config.temperature, + model_kwargs={"stop": llm_config.stop}, + http_client=http_client, + # callbacks=[callBack], + ) + if callBack is not None: + self.llm.callBacks = [callBack] + + def __call__(self, prompt: str, + stop: Optional[List[str]] = None): + return self.llm.predict(prompt, stop=stop) + + +class LYWWLLMModel(OpenAILLMModel): + + def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None,): + if llm_config is None: + api_key=os.environ.get("api_key") + base_url=os.environ.get("api_base_url") + model_name=os.environ.get("LLM_MODEL", "yi-34b-chat-0205") + temperature=os.environ.get("temperature", 0.5) + model_kwargs={"stop": os.environ.get("stop", "")} + else: + api_key=llm_config.api_key + base_url=llm_config.api_base_url + model_name=llm_config.model_name + temperature=llm_config.temperature + model_kwargs={"stop": llm_config.stop} + + self.llm = ChatOpenAI( + streaming=True, + verbose=True, + api_key=api_key, + base_url=base_url, + model_name=model_name, + temperature=temperature, + model_kwargs=model_kwargs, + ) + def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None, ) -> Union[ChatOpenAI, LLM]: - # logger.debug(f"llm type is {type(llm_config.llm)}") - if llm_config is None: - model = ChatOpenAI( - streaming=True, - verbose=True, - openai_api_key=os.environ.get("api_key"), - openai_api_base=os.environ.get("api_base_url"), - model_name=os.environ.get("LLM_MODEL", "gpt-3.5-turbo"), - temperature=os.environ.get("temperature", 0.5), - stop=os.environ.get("stop", ""), - ) - return model if llm_config and llm_config.llm and isinstance(llm_config.llm, LLM): return CustomLLMModel(llm=llm_config.llm) - - if callBack is None: - model = ChatOpenAI( - streaming=True, - verbose=True, - openai_api_key=llm_config.api_key, - openai_api_base=llm_config.api_base_url, - model_name=llm_config.model_name, - temperature=llm_config.temperature, - stop=llm_config.stop - ) + elif llm_config: + model_class_dict = {"openai": OpenAILLMModel, "lingyiwanwu": LYWWLLMModel} + model_class = model_class_dict[llm_config.model_engine] + model = model_class(llm_config, callBack) + logger.debug(f"{model}") + return model else: - model = ChatOpenAI( - streaming=True, - verbose=True, - callBack=[callBack], - openai_api_key=llm_config.api_key, - openai_api_base=llm_config.api_base_url, - model_name=llm_config.model_name, - temperature=llm_config.temperature, - stop=llm_config.stop - ) - - return model + return OpenAILLMModel(llm_config, callBack) import json, requests diff --git a/muagent/embeddings/commands/__init__.py b/muagent/retrieval/commands/__init__.py similarity index 100% rename from muagent/embeddings/commands/__init__.py rename to muagent/retrieval/commands/__init__.py diff --git a/muagent/embeddings/commands/default_vs_cds.py b/muagent/retrieval/commands/default_vs_cds.py similarity index 100% rename from muagent/embeddings/commands/default_vs_cds.py rename to muagent/retrieval/commands/default_vs_cds.py diff --git a/muagent/retrieval/document_loaders/json_loader.py b/muagent/retrieval/document_loaders/json_loader.py index 574e078..7f6ab95 100644 --- a/muagent/retrieval/document_loaders/json_loader.py +++ b/muagent/retrieval/document_loaders/json_loader.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import AnyStr, Callable, Dict, List, Optional, Union -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document from langchain.document_loaders.base import BaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter diff --git a/muagent/retrieval/document_loaders/jsonl_loader.py b/muagent/retrieval/document_loaders/jsonl_loader.py index bec8033..e9a9135 100644 --- a/muagent/retrieval/document_loaders/jsonl_loader.py +++ b/muagent/retrieval/document_loaders/jsonl_loader.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import AnyStr, Callable, Dict, List, Optional, Union -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document from langchain.document_loaders.base import BaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter diff --git a/muagent/embeddings/faiss_m.py b/muagent/retrieval/faiss_m.py similarity index 96% rename from muagent/embeddings/faiss_m.py rename to muagent/retrieval/faiss_m.py index d7b6721..7f3fc69 100644 --- a/muagent/embeddings/faiss_m.py +++ b/muagent/retrieval/faiss_m.py @@ -21,13 +21,13 @@ import numpy as np -from langchain.docstore.base import AddableMixin, Docstore -from langchain.docstore.document import Document -# from langchain.docstore.in_memory import InMemoryDocstore +from langchain_community.docstore.base import AddableMixin, Docstore +from langchain_community.docstore.document import Document +# from langchain_community.docstore.in_memory import InMemoryDocstore from .in_memory import InMemoryDocstore from langchain.embeddings.base import Embeddings -from langchain.vectorstores.base import VectorStore -from langchain.vectorstores.utils import maximal_marginal_relevance +from langchain_community.vectorstores import VectorStore +from langchain_community.vectorstores.utils import maximal_marginal_relevance class DistanceStrategy(str, Enum): @@ -86,7 +86,7 @@ class FAISS(VectorStore): .. code-block:: python from langchain.embeddings.openai import OpenAIEmbeddings - from langchain.vectorstores import FAISS + from langchain_community.vectorstores import FAISS embeddings = OpenAIEmbeddings() texts = ["FAISS is an important library", "LangChain supports FAISS"] diff --git a/muagent/embeddings/in_memory.py b/muagent/retrieval/in_memory.py similarity index 89% rename from muagent/embeddings/in_memory.py rename to muagent/retrieval/in_memory.py index f92484d..342bd5e 100644 --- a/muagent/embeddings/in_memory.py +++ b/muagent/retrieval/in_memory.py @@ -1,8 +1,8 @@ """Simple in memory docstore in the form of a dict.""" from typing import Dict, List, Optional, Union -from langchain.docstore.base import AddableMixin, Docstore -from langchain.docstore.document import Document +from langchain_community.docstore.base import AddableMixin, Docstore +from langchain_community.docstore.document import Document class InMemoryDocstore(Docstore, AddableMixin): diff --git a/muagent/embeddings/utils.py b/muagent/retrieval/utils.py similarity index 92% rename from muagent/embeddings/utils.py rename to muagent/retrieval/utils.py index 25088b1..9a42dbf 100644 --- a/muagent/embeddings/utils.py +++ b/muagent/retrieval/utils.py @@ -1,6 +1,6 @@ import os from functools import lru_cache -from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings from langchain.embeddings.base import Embeddings # from configs.model_config import embedding_model_dict diff --git a/muagent/service/base_service.py b/muagent/service/base_service.py index 76248fb..b180918 100644 --- a/muagent/service/base_service.py +++ b/muagent/service/base_service.py @@ -3,7 +3,7 @@ import os from langchain.embeddings.base import Embeddings -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document # from configs.model_config import ( # kbs_config, VECTOR_SEARCH_TOP_K, SCORE_THRESHOLD, @@ -16,7 +16,7 @@ from muagent.orm.commands import * from muagent.utils.path_utils import * from muagent.orm.utils import DocumentFile -from muagent.embeddings.utils import load_embeddings, load_embeddings_from_path +from muagent.retrieval.utils import load_embeddings, load_embeddings_from_path from muagent.retrieval.text_splitter import LCTextSplitter from muagent.llm_models.llm_config import EmbedConfig diff --git a/muagent/service/cb_api.py b/muagent/service/cb_api.py index 392d7dd..0c60abb 100644 --- a/muagent/service/cb_api.py +++ b/muagent/service/cb_api.py @@ -12,7 +12,7 @@ from fastapi.responses import StreamingResponse, FileResponse from fastapi import File, Form, Body, Query, UploadFile -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document from .service_factory import KBServiceFactory from muagent.utils.server_utils import BaseResponse, ListResponse diff --git a/muagent/service/faiss_db_service.py b/muagent/service/faiss_db_service.py index 499b793..77720f8 100644 --- a/muagent/service/faiss_db_service.py +++ b/muagent/service/faiss_db_service.py @@ -4,10 +4,10 @@ from functools import lru_cache from loguru import logger -# from langchain.vectorstores import FAISS +# from langchain_community.vectorstores import FAISS from langchain.embeddings.base import Embeddings -from langchain.docstore.document import Document -from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from langchain_community.docstore.document import Document +from langchain_huggingface import HuggingFaceEmbeddings from muagent.base_configs.env_config import ( KB_ROOT_PATH, @@ -18,8 +18,8 @@ from muagent.utils.path_utils import * from muagent.orm.utils import DocumentFile from muagent.utils.server_utils import torch_gc -from muagent.embeddings.utils import load_embeddings, load_embeddings_from_path -from muagent.embeddings.faiss_m import FAISS +from muagent.retrieval.utils import load_embeddings, load_embeddings_from_path +from muagent.retrieval.faiss_m import FAISS from muagent.llm_models.llm_config import EmbedConfig diff --git a/muagent/service/kb_api.py b/muagent/service/kb_api.py index 002c16b..6462c07 100644 --- a/muagent/service/kb_api.py +++ b/muagent/service/kb_api.py @@ -8,7 +8,7 @@ from fastapi.responses import StreamingResponse, FileResponse from fastapi import Body, File, Form, Body, Query, UploadFile -from langchain.docstore.document import Document +from langchain_community.docstore.document import Document from .service_factory import KBServiceFactory from muagent.utils.server_utils import BaseResponse, ListResponse diff --git a/muagent/utils/path_utils.py b/muagent/utils/path_utils.py index 4df121a..8d48cfc 100644 --- a/muagent/utils/path_utils.py +++ b/muagent/utils/path_utils.py @@ -1,5 +1,5 @@ import os -from langchain.document_loaders import CSVLoader, PyPDFLoader, UnstructuredFileLoader, TextLoader, PythonLoader +from langchain_community.document_loaders import CSVLoader, PyPDFLoader, UnstructuredFileLoader, TextLoader, PythonLoader from muagent.retrieval.document_loaders import JSONLLoader, JSONLoader # from configs.model_config import ( diff --git a/requirements.txt b/requirements.txt index ef0698c..f129643 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,12 @@ -openai==0.28.1 -langchain<=0.0.266 +openai +langchain==0.2.3 +langchain_community==0.2.4 +langchain_openai==0.1.8 +langchain_huggingface==1.3.0 sentence_transformers loguru -fastapi~=0.99.1 +# fastapi~=0.99.1 +fastapi pandas jieba psutil @@ -17,4 +21,7 @@ SQLAlchemy==2.0.19 docker redis==5.0.1 pydantic<=1.10.14 +# pydantic # duckduckgo-search + +sseclient \ No newline at end of file diff --git a/tests/codechat/codebasehander_test.py b/tests/codechat/codebasehander_test.py index b6f4d2c..b885247 100644 --- a/tests/codechat/codebasehander_test.py +++ b/tests/codechat/codebasehander_test.py @@ -23,8 +23,11 @@ embed_model_path = "" logger.error(f"{e}") - - +# local debug +src_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +) +sys.path.append(src_dir) from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler from muagent.base_configs.env_config import CB_ROOT_PATH @@ -63,4 +66,5 @@ # search_type = [tag, cypher, description] if you have llm and nebula-api -code_text, related_vertex = cbh.search_code(query="remove函数做什么", search_type="tag", limit = 3) \ No newline at end of file +code_text, related_vertex = cbh.search_code(query="remove函数做什么", search_type="tag", limit = 3) +print(code_text) \ No newline at end of file diff --git a/tests/connector/agent_test.py b/tests/connector/agent_test.py index 137758e..f4b4c36 100644 --- a/tests/connector/agent_test.py +++ b/tests/connector/agent_test.py @@ -24,11 +24,10 @@ embed_model_path = "" logger.error(f"{e}") -# src_dir = os.path.join( -# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# ) - -# sys.path.append(src_dir) +src_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +) +sys.path.append(src_dir) from muagent.connector.agents import BaseAgent, ReactAgent, ExecutorAgent, SelectorAgent from muagent.connector.schema import Role, Message from muagent.llm_models.llm_config import EmbedConfig, LLMConfig @@ -191,4 +190,4 @@ # base_agent.pre_print(query) output_message = base_agent.step(query) print(output_message.input_query) -print(output_message.role_content) +print(output_message.parsed_output_list) diff --git a/tests/connector/chain_test.py b/tests/connector/chain_test.py index 427a1f1..649b50f 100644 --- a/tests/connector/chain_test.py +++ b/tests/connector/chain_test.py @@ -27,7 +27,6 @@ src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) - sys.path.append(src_dir) from muagent.base_configs.env_config import JUPYTER_WORK_PATH diff --git a/tests/connector/memory_manager_test.py b/tests/connector/memory_manager_test.py index 24a6656..5518ea7 100644 --- a/tests/connector/memory_manager_test.py +++ b/tests/connector/memory_manager_test.py @@ -33,7 +33,7 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_type="openai", api_key=api_key, api_base_url=api_base_url, temperature=0.3, ) embed_config = EmbedConfig( diff --git a/tests/connector/phase_test.py b/tests/connector/phase_test.py index 4ea3cfe..158dd55 100644 --- a/tests/connector/phase_test.py +++ b/tests/connector/phase_test.py @@ -27,7 +27,6 @@ src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) - sys.path.append(src_dir) from muagent.base_configs.env_config import JUPYTER_WORK_PATH diff --git a/tests/llm_models/openai_test.py b/tests/llm_models/openai_test.py index 795ff85..d8c9ea8 100644 --- a/tests/llm_models/openai_test.py +++ b/tests/llm_models/openai_test.py @@ -24,9 +24,8 @@ - -from langchain.chat_models import ChatOpenAI -from langchain import PromptTemplate, LLMChain +# test 1 +from langchain_openai import ChatOpenAI from langchain.prompts.chat import ChatPromptTemplate model = ChatOpenAI( streaming=True, @@ -39,34 +38,12 @@ # test 1 print(model.predict("please output 123!")) -# # test 2 -# chat_prompt = ChatPromptTemplate.from_messages([("human", "{input}")]) -# chain = LLMChain(prompt=chat_prompt, llm=model) -# content = chain({"input": "who are you!"}) -# print(content) -# test 3 -# import openai -# # openai.api_key = "EMPTY" # Not support yet -# openai.api_base = api_base_url -# # create a chat completion -# completion = openai.ChatCompletion.create( -# model=model_name, -# messages=[{"role": "user", "content": "Hello! What is your name? "}], -# max_tokens=100, -# ) -# # print the completion -# print(completion.choices[0].message.content) - -# import openai -# # openai.api_key = "EMPTY" # Not support yet -# openai.api_base = "http://127.0.0.1:8888/v1" -# model = "example" -# # create a chat completion -# completion = openai.ChatCompletion.create( -# model=model, -# messages=[{"role": "user", "content": "Hello! What is your name? "}], -# max_tokens=100, -# ) -# # print the completion -# print(completion.choices[0].message.content) \ No newline at end of file +# # test 2 +# from openai import OpenAI +# http_client = None +# client = OpenAI(api_key=os.environ.get("api_key"), http_client=http_client) +# model = 'gpt-3.5-turbo' +# messages=[{'role': 'user', 'content': 'Hello World'}] +# result = client.chat.completions.create(model=model, messages=messages) +# print(result) From e19e3bb8e08f6504de37dc70efd2f4d6bb435911 Mon Sep 17 00:00:00 2001 From: shanshi Date: Sun, 16 Jun 2024 14:35:21 +0800 Subject: [PATCH 2/5] update open interface params --- Dockerfile | 20 ++++ docker_build.sh | 3 + .../muagent_examples/baseGroup_example.py | 8 +- .../muagent_examples/baseTask_examples.py | 16 +++- .../muagent_examples/codeGenDoc_example.py | 22 +++-- ...example_copy.py => codeGenTest_example.py} | 18 +++- .../muagent_examples/codeReact_example.py | 20 +++- .../muagent_examples/codeRetrieval_example.py | 20 +++- .../muagent_examples/codeToolReact_example.py | 20 +++- examples/muagent_examples/codechat_example.py | 52 ++++++---- examples/muagent_examples/docchat_example.py | 20 +++- examples/muagent_examples/load_codebase.py | 21 ++-- examples/muagent_examples/metagpt_example.py | 19 +++- examples/muagent_examples/search_example.py | 21 +++- .../muagent_examples/toolReact_example.py | 12 ++- examples/start.py | 5 +- examples/test_config.py.example | 1 + muagent/chat/agent_chat.py | 44 +++++---- muagent/chat/base_chat.py | 44 +++++---- muagent/chat/code_chat.py | 50 ++++++---- muagent/chat/knowledge_chat.py | 12 ++- muagent/chat/search_chat.py | 2 +- muagent/llm_models/openai_model.py | 8 +- muagent/service/cb_api.py | 63 ++++++------ muagent/service/kb_api.py | 96 ++++++++++--------- muagent/tools/cb_query_tool.py | 12 ++- muagent/tools/codechat_tools.py | 15 ++- muagent/tools/docs_retrieval.py | 9 +- requirements.txt | 4 +- tests/connector/agent_test.py | 11 ++- tests/connector/chain_test.py | 5 +- tests/connector/flow_test.py | 10 +- tests/connector/memory_manager_test.py | 6 +- tests/connector/phase_test.py | 6 +- tests/test_config.py.example | 2 +- 35 files changed, 468 insertions(+), 229 deletions(-) create mode 100644 Dockerfile create mode 100644 docker_build.sh rename examples/muagent_examples/{codeGenTest_example_copy.py => codeGenTest_example.py} (93%) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2ac6041 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +From python:3.9.18-bookworm + +WORKDIR /home/user + +COPY ./requirements.txt /home/user/docker_requirements.txt + + +# RUN apt-get update +# RUN apt-get install -y iputils-ping telnetd net-tools vim tcpdump +# RUN echo telnet stream tcp nowait telnetd /usr/sbin/tcpd /usr/sbin/in.telnetd /etc/inetd.conf +# RUN service inetutils-inetd start +# service inetutils-inetd status + +RUN wget https://oss-cdn.nebula-graph.com.cn/package/3.6.0/nebula-graph-3.6.0.ubuntu1804.amd64.deb +RUN dpkg -i nebula-graph-3.6.0.ubuntu1804.amd64.deb + +RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +RUN pip install -r /home/user/docker_requirements.txt + +CMD ["bash"] \ No newline at end of file diff --git a/docker_build.sh b/docker_build.sh new file mode 100644 index 0000000..ac6dfc1 --- /dev/null +++ b/docker_build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker build -t muagent:0.0.1 . \ No newline at end of file diff --git a/examples/muagent_examples/baseGroup_example.py b/examples/muagent_examples/baseGroup_example.py index 84973e4..315f525 100644 --- a/examples/muagent_examples/baseGroup_example.py +++ b/examples/muagent_examples/baseGroup_example.py @@ -13,6 +13,7 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] try: from test_config import BgeBaseChineseEmbeddings @@ -24,12 +25,17 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" embeddings = None logger.error(f"{e}") - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import JUPYTER_WORK_PATH from muagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS from muagent.llm_models.llm_config import EmbedConfig, LLMConfig diff --git a/examples/muagent_examples/baseTask_examples.py b/examples/muagent_examples/baseTask_examples.py index c88ffd9..1e367c7 100644 --- a/examples/muagent_examples/baseTask_examples.py +++ b/examples/muagent_examples/baseTask_examples.py @@ -13,16 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase from muagent.connector.schema import Message @@ -32,7 +46,7 @@ os.environ["log_verbose"] = "0" llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/codeGenDoc_example.py b/examples/muagent_examples/codeGenDoc_example.py index 63113e8..28be9d4 100644 --- a/examples/muagent_examples/codeGenDoc_example.py +++ b/examples/muagent_examples/codeGenDoc_example.py @@ -1,5 +1,4 @@ import os -import json from loguru import logger try: @@ -14,22 +13,31 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") import sys, os -src_dir = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -) -print(src_dir) -sys.path.append(src_dir) +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import CB_ROOT_PATH from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase @@ -151,7 +159,7 @@ def start_action_step(self, message: Message) -> Message: llm_config = LLMConfig( - model_name="gpt-4", api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( embed_engine="model", embed_model=embed_model, embed_model_path=embed_model_path diff --git a/examples/muagent_examples/codeGenTest_example_copy.py b/examples/muagent_examples/codeGenTest_example.py similarity index 93% rename from examples/muagent_examples/codeGenTest_example_copy.py rename to examples/muagent_examples/codeGenTest_example.py index ecfd27f..4a245cc 100644 --- a/examples/muagent_examples/codeGenTest_example_copy.py +++ b/examples/muagent_examples/codeGenTest_example.py @@ -1,5 +1,4 @@ import os -import json from loguru import logger try: @@ -14,15 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") + +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import CB_ROOT_PATH from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase @@ -162,7 +176,7 @@ def start_action_step(self, message: Message) -> Message: llm_config = LLMConfig( - model_name="gpt-4", api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( embed_engine="model", embed_model=embed_model, embed_model_path=embed_model_path diff --git a/examples/muagent_examples/codeReact_example.py b/examples/muagent_examples/codeReact_example.py index 19868fd..5a0e155 100644 --- a/examples/muagent_examples/codeReact_example.py +++ b/examples/muagent_examples/codeReact_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,16 +13,29 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import JUPYTER_WORK_PATH from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase @@ -31,7 +45,7 @@ os.environ["log_verbose"] = "0" llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/codeRetrieval_example.py b/examples/muagent_examples/codeRetrieval_example.py index 1a7a232..6174f0e 100644 --- a/examples/muagent_examples/codeRetrieval_example.py +++ b/examples/muagent_examples/codeRetrieval_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,16 +13,29 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.agents import BaseAgent, ReactAgent, ExecutorAgent, SelectorAgent from muagent.connector.chains import BaseChain @@ -132,7 +146,7 @@ def end_action_step(self, message: Message) -> Message: # llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/codeToolReact_example.py b/examples/muagent_examples/codeToolReact_example.py index 20395cf..effa33b 100644 --- a/examples/muagent_examples/codeToolReact_example.py +++ b/examples/muagent_examples/codeToolReact_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,16 +13,29 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS from muagent.llm_models.llm_config import EmbedConfig, LLMConfig @@ -38,7 +52,7 @@ os.environ["log_verbose"] = "0" llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/codechat_example.py b/examples/muagent_examples/codechat_example.py index 241aae3..9fefc95 100644 --- a/examples/muagent_examples/codechat_example.py +++ b/examples/muagent_examples/codechat_example.py @@ -13,15 +13,25 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") +# # test local code # src_dir = os.path.join( # os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # ) @@ -38,7 +48,7 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( @@ -70,26 +80,26 @@ ) -# round-1 -query_content = "代码一共有多少类" -query = Message( - chat_index="codechat_test", role_name="human", role_type="user", input_query=query_content, - code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="cypher", - local_graph_path=CB_ROOT_PATH, use_nh=use_nh - ) - -output_message1, output_memory1 = phase.step(query) -print(output_memory1.to_str_messages(return_all=True, content_key="parsed_output_list")) - -# round-2 -query_content = "代码库里有哪些函数,返回5个就行" -query = Message( - chat_index="codechat_test", role_name="human", role_type="user", input_query=query_content, - code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="cypher", - local_graph_path=CB_ROOT_PATH, use_nh=use_nh - ) -output_message2, output_memory2 = phase.step(query) -print(output_memory2.to_str_messages(return_all=True, content_key="parsed_output_list")) +# # round-1 +# query_content = "代码一共有多少类" +# query = Message( +# chat_index="codechat_test", role_name="human", role_type="user", input_query=query_content, +# code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="cypher", +# local_graph_path=CB_ROOT_PATH, use_nh=use_nh +# ) + +# output_message1, output_memory1 = phase.step(query) +# print(output_memory1.to_str_messages(return_all=True, content_key="parsed_output_list")) + +# # round-2 +# query_content = "代码库里有哪些函数,返回5个就行" +# query = Message( +# chat_index="codechat_test", role_name="human", role_type="user", input_query=query_content, +# code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="cypher", +# local_graph_path=CB_ROOT_PATH, use_nh=use_nh +# ) +# output_message2, output_memory2 = phase.step(query) +# print(output_memory2.to_str_messages(return_all=True, content_key="parsed_output_list")) # # round-3 diff --git a/examples/muagent_examples/docchat_example.py b/examples/muagent_examples/docchat_example.py index 8c7f982..1ba7ed4 100644 --- a/examples/muagent_examples/docchat_example.py +++ b/examples/muagent_examples/docchat_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,17 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import KB_ROOT_PATH from muagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS from muagent.llm_models.llm_config import EmbedConfig, LLMConfig @@ -35,7 +49,7 @@ # llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/load_codebase.py b/examples/muagent_examples/load_codebase.py index e8074a5..1267c80 100644 --- a/examples/muagent_examples/load_codebase.py +++ b/examples/muagent_examples/load_codebase.py @@ -13,21 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") -src_dir = os.path.join( - os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -) -print(src_dir) -sys.path.append(src_dir) +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.base_configs.env_config import CB_ROOT_PATH from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase @@ -40,7 +49,7 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/metagpt_example.py b/examples/muagent_examples/metagpt_example.py index f9336cd..455549b 100644 --- a/examples/muagent_examples/metagpt_example.py +++ b/examples/muagent_examples/metagpt_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,16 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.connector.phase import BasePhase @@ -32,7 +47,7 @@ # llm_config = LLMConfig( - model_name="gpt-4", api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/search_example.py b/examples/muagent_examples/search_example.py index 7a686b0..068bd0d 100644 --- a/examples/muagent_examples/search_example.py +++ b/examples/muagent_examples/search_example.py @@ -1,7 +1,8 @@ -import os, sys, json +import os from loguru import logger try: + import os, sys src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) @@ -12,18 +13,30 @@ model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] + + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" + embeddings = None logger.error(f"{e}") - - +# # test local code +# src_dir = os.path.join( +# os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# ) +# sys.path.append(src_dir) from muagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS from muagent.llm_models.llm_config import EmbedConfig, LLMConfig @@ -37,7 +50,7 @@ # llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) embed_config = EmbedConfig( diff --git a/examples/muagent_examples/toolReact_example.py b/examples/muagent_examples/toolReact_example.py index 3632fc6..872ceeb 100644 --- a/examples/muagent_examples/toolReact_example.py +++ b/examples/muagent_examples/toolReact_example.py @@ -8,24 +8,30 @@ ) sys.path.append(src_dir) import test_config - from test_config import BgeBaseChineseEmbeddings api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] + model_engine = os.environ["model_engine"] - embeddings = BgeBaseChineseEmbeddings() + try: + from test_config import BgeBaseChineseEmbeddings + embeddings = BgeBaseChineseEmbeddings() + except: + embeddings = None except Exception as e: # set your config api_key = "" api_base_url= "" model_name = "" + model_engine = "" embed_model = "" embed_model_path = "" embeddings = None logger.error(f"{e}") +# test local code src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -38,7 +44,7 @@ # llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) if embeddings: diff --git a/examples/start.py b/examples/start.py index e3bb358..524ea16 100644 --- a/examples/start.py +++ b/examples/start.py @@ -14,7 +14,7 @@ DEFAULT_BIND_HOST = "127.0.0.1" os.environ["DEFAULT_BIND_HOST"] = DEFAULT_BIND_HOST CONTRAINER_NAME = "muagent" -IMAGE_NAME = "devopsgpt:py39" +IMAGE_NAME = "muagent:latest" SANDBOX_CONTRAINER_NAME = "devopsgpt_sandbox" SANDBOX_IMAGE_NAME = "devopsgpt:py39" SANDBOX_HOST = os.environ.get("SANDBOX_HOST") or DEFAULT_BIND_HOST # "172.25.0.3" @@ -213,8 +213,7 @@ def start_api_service(sandbox_host=DEFAULT_BIND_HOST): '''curl -X PUT -H "Content-Type: application/json" -d'{"heartbeat_interval_secs":"2"}' -s "http://127.0.0.1:19669/flags"''', '''curl -X PUT -H "Content-Type: application/json" -d'{"heartbeat_interval_secs":"2"}' -s "http://127.0.0.1:19779/flags"''', - "pip install zdatafront-sdk-python==0.1.2 -i https://artifacts.antgroup-inc.cn/simple", - "pip install jieba", + "pip install zdatafront-sdk-python -i https://artifacts.antgroup-inc.cn/simple", "pip install duckduckgo-search", f"export DUCKDUCKGO_PROXY=socks5://host.docker.internal:13659 && export SANDBOX_HOST={sandbox_host}", diff --git a/examples/test_config.py.example b/examples/test_config.py.example index 01e99ca..1b96b3f 100644 --- a/examples/test_config.py.example +++ b/examples/test_config.py.example @@ -7,6 +7,7 @@ os.environ["API_BASE_URL"] = OPENAI_API_BASE os.environ["OPENAI_API_KEY"] = "sk-xxx" openai.api_key = "sk-xxx" os.environ["model_name"] = "gpt-3.5-turbo" +os.environ["model_engine] = "openai" # os.environ["embed_model"] = "{{embed_model_name}}" diff --git a/muagent/chat/agent_chat.py b/muagent/chat/agent_chat.py index 79628ee..618b404 100644 --- a/muagent/chat/agent_chat.py +++ b/muagent/chat/agent_chat.py @@ -70,14 +70,16 @@ def chat( kb_root_path: str = Body("", description="知识库存储路径"), jupyter_work_path: str = Body("", description="sandbox执行环境"), sandbox_server: str = Body({}, description="代码历史相关节点"), - api_key: str = Body(os.environ.get("OPENAI_API_KEY"), description=""), - api_base_url: str = Body(os.environ.get("API_BASE_URL"),), - embed_model: str = Body("", description="向量模型"), - embed_model_path: str = Body("", description="向量模型路径"), - model_device: str = Body("", description="模型加载设备"), - embed_engine: str = Body("", description="向量模型类型"), - model_name: str = Body("", description="llm模型名称"), - temperature: float = Body(0.2, description=""), + # api_key: str = Body(os.environ.get("OPENAI_API_KEY"), description=""), + # api_base_url: str = Body(os.environ.get("API_BASE_URL"),), + # embed_model: str = Body("", description="向量模型"), + # embed_model_path: str = Body("", description="向量模型路径"), + # model_device: str = Body("", description="模型加载设备"), + # embed_engine: str = Body("", description="向量模型类型"), + # model_name: str = Body("", description="llm模型名称"), + # temperature: float = Body(0.2, description=""), + llm_config: LLMConfig = Body({}, description="llm_model config"), + embed_config: EmbedConfig = Body({}, description="llm_model config"), chat_index: str = "", local_graph_path: str = "", **kargs @@ -88,8 +90,8 @@ def chat( custom_phase_configs, custom_chain_configs, custom_role_configs) params = locals() params.pop("self") - embed_config: EmbedConfig = EmbedConfig(**params) - llm_config: LLMConfig = LLMConfig(**params) + # embed_config: EmbedConfig = EmbedConfig(**params) + # llm_config: LLMConfig = LLMConfig(**params) logger.info('phase_configs={}'.format(phase_configs)) logger.info('chain_configs={}'.format(chain_configs)) @@ -216,14 +218,16 @@ def achat( kb_root_path: str = Body("", description="知识库存储路径"), jupyter_work_path: str = Body("", description="sandbox执行环境"), sandbox_server: str = Body({}, description="代码历史相关节点"), - api_key: str = Body(os.environ["OPENAI_API_KEY"], description=""), - api_base_url: str = Body(os.environ.get("API_BASE_URL"),), - embed_model: str = Body("", description="向量模型"), - embed_model_path: str = Body("", description="向量模型路径"), - model_device: str = Body("", description="模型加载设备"), - embed_engine: str = Body("", description="向量模型类型"), - model_name: str = Body("", description="llm模型名称"), - temperature: float = Body(0.2, description=""), + # api_key: str = Body(os.environ["OPENAI_API_KEY"], description=""), + # api_base_url: str = Body(os.environ.get("API_BASE_URL"),), + # embed_model: str = Body("", description="向量模型"), + # embed_model_path: str = Body("", description="向量模型路径"), + # model_device: str = Body("", description="模型加载设备"), + # embed_engine: str = Body("", description="向量模型类型"), + # model_name: str = Body("", description="llm模型名称"), + # temperature: float = Body(0.2, description=""), + llm_config: LLMConfig = Body({}, description="llm_model config"), + embed_config: EmbedConfig = Body({}, description="llm_model config"), chat_index: str = "", local_graph_path: str = "", **kargs @@ -236,8 +240,8 @@ def achat( # params = locals() params.pop("self") - embed_config: EmbedConfig = EmbedConfig(**params) - llm_config: LLMConfig = LLMConfig(**params) + # embed_config: EmbedConfig = EmbedConfig(**params) + # llm_config: LLMConfig = LLMConfig(**params) # choose tools tools = toLangchainTools([TOOL_DICT[i] for i in choose_tools if i in TOOL_DICT]) diff --git a/muagent/chat/base_chat.py b/muagent/chat/base_chat.py index 5771b40..1df3938 100644 --- a/muagent/chat/base_chat.py +++ b/muagent/chat/base_chat.py @@ -43,20 +43,22 @@ def chat( stream: bool = Body(False, description="流式输出"), local_doc_url: bool = Body(False, description="知识文件返回本地路径(true)或URL(false)"), request: Request = None, - api_key: str = Body(os.environ.get("OPENAI_API_KEY")), - api_base_url: str = Body(os.environ.get("API_BASE_URL")), - embed_model: str = Body("", ), - embed_model_path: str = Body("", ), - embed_engine: str = Body("", ), - model_name: str = Body("", ), - temperature: float = Body(0.5, ), - model_device: str = Body("", ), + # api_key: str = Body(os.environ.get("OPENAI_API_KEY")), + # api_base_url: str = Body(os.environ.get("API_BASE_URL")), + # embed_model: str = Body("", ), + # embed_model_path: str = Body("", ), + # embed_engine: str = Body("", ), + # model_name: str = Body("", ), + # temperature: float = Body(0.5, ), + # model_device: str = Body("", ), + llm_config: LLMConfig = Body({}, description="llm_model config"), + embed_config: EmbedConfig = Body({}, description="embedding_model config"), **kargs ): params = locals() params.pop("self", None) - llm_config: LLMConfig = LLMConfig(**params) - embed_config: EmbedConfig = EmbedConfig(**params) + # llm_config: LLMConfig = LLMConfig(**params) + # embed_config: EmbedConfig = EmbedConfig(**params) self.engine_name = engine_name if isinstance(engine_name, str) else engine_name.default self.top_k = top_k if isinstance(top_k, int) else top_k.default self.score_threshold = score_threshold if isinstance(score_threshold, float) else score_threshold.default @@ -106,20 +108,22 @@ def achat( stream: bool = Body(False, description="流式输出"), local_doc_url: bool = Body(False, description="知识文件返回本地路径(true)或URL(false)"), request: Request = None, - api_key: str = Body(os.environ.get("OPENAI_API_KEY")), - api_base_url: str = Body(os.environ.get("API_BASE_URL")), - embed_model: str = Body("", ), - embed_model_path: str = Body("", ), - embed_engine: str = Body("", ), - model_name: str = Body("", ), - temperature: float = Body(0.5, ), - model_device: str = Body("", ), + # api_key: str = Body(os.environ.get("OPENAI_API_KEY")), + # api_base_url: str = Body(os.environ.get("API_BASE_URL")), + # embed_model: str = Body("", ), + # embed_model_path: str = Body("", ), + # embed_engine: str = Body("", ), + # model_name: str = Body("", ), + # temperature: float = Body(0.5, ), + # model_device: str = Body("", ), + llm_config: LLMConfig = Body({}, description="llm_model config"), + embed_config: EmbedConfig = Body({}, description="llm_model config"), ): # params = locals() params.pop("self", None) - llm_config: LLMConfig = LLMConfig(**params) - embed_config: EmbedConfig = EmbedConfig(**params) + # llm_config: LLMConfig = LLMConfig(**params) + # embed_config: EmbedConfig = EmbedConfig(**params) self.engine_name = engine_name if isinstance(engine_name, str) else engine_name.default self.top_k = top_k if isinstance(top_k, int) else top_k.default self.score_threshold = score_threshold if isinstance(score_threshold, float) else score_threshold.default diff --git a/muagent/chat/code_chat.py b/muagent/chat/code_chat.py index 32fa8d1..12ffa6b 100644 --- a/muagent/chat/code_chat.py +++ b/muagent/chat/code_chat.py @@ -53,24 +53,31 @@ def check_service_status(self) -> BaseResponse: return BaseResponse(code=404, msg=f"未找到代码库 {self.engine_name}") return BaseResponse(code=200, msg=f"找到代码库 {self.engine_name}") - def _process(self, query: str, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, local_graph_path=""): + def _process(self, query: str, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, local_graph_path="", use_nh=True): '''process''' + # codes_res = search_code(query=query, cb_name=self.engine_name, code_limit=self.code_limit, + # search_type=self.cb_search_type, + # history_node_list=self.history_node_list, + # api_key=llm_config.api_key, + # api_base_url=llm_config.api_base_url, + # model_name=llm_config.model_name, + # temperature=llm_config.temperature, + # embed_model=embed_config.embed_model, + # embed_model_path=embed_config.embed_model_path, + # embed_engine=embed_config.embed_engine, + # model_device=embed_config.model_device, + # embed_config=embed_config, + # local_graph_path=local_graph_path + # ) codes_res = search_code(query=query, cb_name=self.engine_name, code_limit=self.code_limit, search_type=self.cb_search_type, history_node_list=self.history_node_list, - api_key=llm_config.api_key, - api_base_url=llm_config.api_base_url, - model_name=llm_config.model_name, - temperature=llm_config.temperature, - embed_model=embed_config.embed_model, - embed_model_path=embed_config.embed_model_path, - embed_engine=embed_config.embed_engine, - model_device=embed_config.model_device, + llm_config=llm_config, embed_config=embed_config, + use_nh=use_nh, local_graph_path=local_graph_path ) - context = codes_res['context'] related_vertices = codes_res['related_vertices'] @@ -108,21 +115,24 @@ def chat( local_doc_url: bool = Body(False, description="知识文件返回本地路径(true)或URL(false)"), request: Request = None, - api_key: str = Body(os.environ.get("OPENAI_API_KEY")), - api_base_url: str = Body(os.environ.get("API_BASE_URL")), - embed_model: str = Body("", ), - embed_model_path: str = Body("", ), - embed_engine: str = Body("", ), - model_name: str = Body("", ), - temperature: float = Body(0.5, ), - model_device: str = Body("", ), + # api_key: str = Body(os.environ.get("OPENAI_API_KEY")), + # api_base_url: str = Body(os.environ.get("API_BASE_URL")), + # embed_model: str = Body("", ), + # embed_model_path: str = Body("", ), + # embed_engine: str = Body("", ), + # model_name: str = Body("", ), + # temperature: float = Body(0.5, ), + # model_device: str = Body("", ), + llm_config: LLMConfig = Body({}, description="llm_model config"), + embed_config: EmbedConfig = Body({}, description="llm_model config"), local_graph_path: str=Body(", "), + use_nh: bool =Body(True, description=""), **kargs ): params = locals() params.pop("self") - llm_config: LLMConfig = LLMConfig(**params) - embed_config: EmbedConfig = EmbedConfig(**params) + # llm_config: LLMConfig = LLMConfig(**params) + # embed_config: EmbedConfig = EmbedConfig(**params) self.engine_name = engine_name if isinstance(engine_name, str) else engine_name.default self.code_limit = code_limit self.stream = stream if isinstance(stream, bool) else stream.default diff --git a/muagent/chat/knowledge_chat.py b/muagent/chat/knowledge_chat.py index 1558f05..4d198a3 100644 --- a/muagent/chat/knowledge_chat.py +++ b/muagent/chat/knowledge_chat.py @@ -47,11 +47,15 @@ def check_service_status(self) -> BaseResponse: def _process(self, query: str, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, ): '''process''' + # docs = search_docs( + # query, self.engine_name, self.top_k, self.score_threshold, self.kb_root_path, + # api_key=embed_config.api_key, api_base_url=embed_config.api_base_url, embed_model=embed_config.embed_model, + # embed_model_path=embed_config.embed_model_path, embed_engine=embed_config.embed_engine, + # model_device=embed_config.model_device, + # ) docs = search_docs( - query, self.engine_name, self.top_k, self.score_threshold, self.kb_root_path, - api_key=embed_config.api_key, api_base_url=embed_config.api_base_url, embed_model=embed_config.embed_model, - embed_model_path=embed_config.embed_model_path, embed_engine=embed_config.embed_engine, - model_device=embed_config.model_device, + query, self.engine_name, self.top_k, self.score_threshold, self.kb_root_path, + llm_config=llm_config, embed_config=embed_config ) context = "\n".join([doc.page_content for doc in docs]) source_documents = [] diff --git a/muagent/chat/search_chat.py b/muagent/chat/search_chat.py index cfc2b23..3854b88 100644 --- a/muagent/chat/search_chat.py +++ b/muagent/chat/search_chat.py @@ -3,7 +3,7 @@ from langchain import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler -from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper +from langchain_community.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain.prompts.chat import ChatPromptTemplate from langchain_community.docstore.document import Document diff --git a/muagent/llm_models/openai_model.py b/muagent/llm_models/openai_model.py index ead3b1c..2f32877 100644 --- a/muagent/llm_models/openai_model.py +++ b/muagent/llm_models/openai_model.py @@ -91,13 +91,15 @@ def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler base_url=os.environ.get("api_base_url") model_name=os.environ.get("LLM_MODEL", "yi-34b-chat-0205") temperature=os.environ.get("temperature", 0.5) - model_kwargs={"stop": os.environ.get("stop", "")} + stop = [os.environ.get("stop", "")] if os.environ.get("stop", "") else None + model_kwargs={"stop": stop} else: api_key=llm_config.api_key base_url=llm_config.api_base_url model_name=llm_config.model_name temperature=llm_config.temperature - model_kwargs={"stop": llm_config.stop} + stop = [llm_config.stop] if llm_config.stop else None + model_kwargs={"stop": stop} self.llm = ChatOpenAI( streaming=True, @@ -110,7 +112,6 @@ def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler ) - def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None, ) -> Union[ChatOpenAI, LLM]: if llm_config and llm_config.llm and isinstance(llm_config.llm, LLM): @@ -119,7 +120,6 @@ def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbac model_class_dict = {"openai": OpenAILLMModel, "lingyiwanwu": LYWWLLMModel} model_class = model_class_dict[llm_config.model_engine] model = model_class(llm_config, callBack) - logger.debug(f"{model}") return model else: return OpenAILLMModel(llm_config, callBack) diff --git a/muagent/service/cb_api.py b/muagent/service/cb_api.py index 0c60abb..974a32f 100644 --- a/muagent/service/cb_api.py +++ b/muagent/service/cb_api.py @@ -47,21 +47,22 @@ async def create_cb(zip_file, cb_name: str = Body(..., examples=["samples"]), code_path: str = Body(..., examples=["samples"]), do_interpret: bool = Body(..., examples=["samples"]), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), - model_name: bool = Body(..., examples=["samples"]), - temperature: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + # model_name: bool = Body(..., examples=["samples"]), + # temperature: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, embed_config: EmbedConfig = None, local_graph_path: str = '', ) -> BaseResponse: logger.info('cb_name={}, zip_path={}, do_interpret={}'.format(cb_name, code_path, do_interpret)) - embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config - llm_config: LLMConfig = LLMConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config + # llm_config: LLMConfig = LLMConfig(**locals()) # Create selected knowledge base if not validate_kb_name(cb_name): @@ -92,20 +93,21 @@ async def create_cb(zip_file, async def delete_cb( cb_name: str = Body(..., examples=["samples"]), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), - model_name: bool = Body(..., examples=["samples"]), - temperature: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + # model_name: bool = Body(..., examples=["samples"]), + # temperature: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, embed_config: EmbedConfig = None, local_graph_path: str="", ) -> BaseResponse: logger.info('cb_name={}'.format(cb_name)) - embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config - llm_config: LLMConfig = LLMConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config + # llm_config: LLMConfig = LLMConfig(**locals()) # Create selected knowledge base if not validate_kb_name(cb_name): return BaseResponse(code=403, msg="Don't attack me") @@ -136,16 +138,17 @@ def search_code(cb_name: str = Body(..., examples=["sofaboot"]), code_limit: int = Body(..., examples=['1']), search_type: str = Body(..., examples=['你好']), history_node_list: list = Body(...), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), - model_name: bool = Body(..., examples=["samples"]), - temperature: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + # model_name: bool = Body(..., examples=["samples"]), + # temperature: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), use_nh: bool = True, local_graph_path: str = CB_ROOT_PATH, + llm_config: LLMConfig = None, embed_config: EmbedConfig = None, ) -> dict: @@ -156,8 +159,8 @@ def search_code(cb_name: str = Body(..., examples=["sofaboot"]), logger.info('code_limit={}'.format(code_limit)) logger.info('search_type={}'.format(search_type)) logger.info('history_node_list={}'.format(history_node_list)) - embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config - llm_config: LLMConfig = LLMConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) if embed_config is None else embed_config + # llm_config: LLMConfig = LLMConfig(**locals()) try: # load codebase cbh = CodeBaseHandler(codebase_name=cb_name, embed_config=embed_config, llm_config=llm_config, diff --git a/muagent/service/kb_api.py b/muagent/service/kb_api.py index 6462c07..68da570 100644 --- a/muagent/service/kb_api.py +++ b/muagent/service/kb_api.py @@ -16,7 +16,7 @@ from muagent.orm.commands import * from muagent.orm.utils import DocumentFile from muagent.base_configs.env_config import KB_ROOT_PATH -from muagent.llm_models.llm_config import EmbedConfig +from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.utils.server_utils import run_async async def list_kbs(): @@ -27,16 +27,17 @@ async def list_kbs(): async def create_kb(knowledge_base_name: str = Body(..., examples=["samples"]), vector_store_type: str = Body("faiss"), kb_root_path: str =Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, embed_config: EmbedConfig = None, ) -> BaseResponse: - embed_config: EmbedConfig = embed_config if embed_config else EmbedConfig(**locals()) + # embed_config: EmbedConfig = embed_config if embed_config else EmbedConfig(**locals()) # Create selected knowledge base if not validate_kb_name(knowledge_base_name): return BaseResponse(code=403, msg="Don't attack me") @@ -93,15 +94,17 @@ def search_docs(query: str = Body(..., description="用户输入", examples=[" top_k: int = Body(5, description="匹配向量数"), score_threshold: float = Body(1.0, description="知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右", ge=0, le=1), kb_root_path: str =Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, + embed_config: EmbedConfig = None, ) -> List[DocumentWithScore]: - embed_config: EmbedConfig = EmbedConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) kb = KBServiceFactory.get_service_by_name(knowledge_base_name, embed_config, kb_root_path) if kb is None: return [] @@ -132,18 +135,19 @@ async def upload_doc(file: UploadFile = File(..., description="上传文件"), override: bool = Form(False, description="覆盖已有文件"), not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"), kb_root_path: str =Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, embed_config: EmbedConfig = None, ) -> BaseResponse: if not validate_kb_name(knowledge_base_name): return BaseResponse(code=403, msg="Don't attack me") - embed_config: EmbedConfig = embed_config if embed_config else EmbedConfig(**locals()) + # embed_config: EmbedConfig = embed_config if embed_config else EmbedConfig(**locals()) kb = KBServiceFactory.get_service_by_name(knowledge_base_name, embed_config, kb_root_path) if kb is None: return BaseResponse(code=404, msg=f"未找到知识库 {knowledge_base_name}") @@ -184,17 +188,19 @@ async def delete_doc(knowledge_base_name: str = Body(..., examples=["samples"]), delete_content: bool = Body(False), not_refresh_vs_cache: bool = Body(False, description="暂不保存向量库(用于FAISS)"), kb_root_path: str =Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, + embed_config: EmbedConfig = None, ) -> BaseResponse: if not validate_kb_name(knowledge_base_name): return BaseResponse(code=403, msg="Don't attack me") - embed_config: EmbedConfig = EmbedConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) knowledge_base_name = urllib.parse.unquote(knowledge_base_name) kb = KBServiceFactory.get_service_by_name(knowledge_base_name, embed_config, kb_root_path) if kb is None: @@ -220,17 +226,19 @@ async def update_doc( file_name: str = Body(..., examples=["file_name"]), not_refresh_vs_cache: bool = Body(False, description="暂不保存向量库(用于FAISS)"), kb_root_path: str =Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, + embed_config: EmbedConfig = None, ) -> BaseResponse: ''' 更新知识库文档 ''' - embed_config: EmbedConfig = EmbedConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) if not validate_kb_name(knowledge_base_name): return BaseResponse(code=403, msg="Don't attack me") @@ -289,12 +297,14 @@ async def recreate_vector_store( allow_empty_kb: bool = Body(True), vs_type: str = Body("faiss"), kb_root_path: str = Body(""), - api_key: bool = Body(..., examples=["samples"]), - api_base_url: bool = Body(..., examples=["samples"]), - embed_model: bool = Body(..., examples=["samples"]), - embed_model_path: bool = Body(..., examples=["samples"]), - model_device: bool = Body(..., examples=["samples"]), - embed_engine: bool = Body(..., examples=["samples"]), + # api_key: bool = Body(..., examples=["samples"]), + # api_base_url: bool = Body(..., examples=["samples"]), + # embed_model: bool = Body(..., examples=["samples"]), + # embed_model_path: bool = Body(..., examples=["samples"]), + # model_device: bool = Body(..., examples=["samples"]), + # embed_engine: bool = Body(..., examples=["samples"]), + llm_config: LLMConfig = None, + embed_config: EmbedConfig = None, ): ''' recreate vector store from the content. @@ -302,7 +312,7 @@ async def recreate_vector_store( by default, get_service_by_name only return knowledge base in the info.db and having document files in it. set allow_empty_kb to True make it applied on empty knowledge base which it not in the info.db or having no documents. ''' - embed_config: EmbedConfig = EmbedConfig(**locals()) + # embed_config: EmbedConfig = EmbedConfig(**locals()) async def output(): kb = KBServiceFactory.get_service(knowledge_base_name, vs_type, embed_config, kb_root_path) if not kb.exists() and not allow_empty_kb: diff --git a/muagent/tools/cb_query_tool.py b/muagent/tools/cb_query_tool.py index 6790e5c..a4ebc1e 100644 --- a/muagent/tools/cb_query_tool.py +++ b/muagent/tools/cb_query_tool.py @@ -50,11 +50,15 @@ def run(cls, }.get(search_type, 'tag') # default + # codes = search_code(code_base_name, query, code_limit, search_type=search_type, history_node_list=history_node_list, + # embed_engine=embed_config.embed_engine, embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, + # model_device=embed_config.model_device, model_name=llm_config.model_name, temperature=llm_config.temperature, + # api_base_url=llm_config.api_base_url, api_key=llm_config.api_key, use_nh=use_nh, + # local_graph_path=local_graph_path, embed_config=embed_config + # ) codes = search_code(code_base_name, query, code_limit, search_type=search_type, history_node_list=history_node_list, - embed_engine=embed_config.embed_engine, embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, - model_device=embed_config.model_device, model_name=llm_config.model_name, temperature=llm_config.temperature, - api_base_url=llm_config.api_base_url, api_key=llm_config.api_key, use_nh=use_nh, - local_graph_path=local_graph_path, embed_config=embed_config + use_nh=use_nh, local_graph_path=local_graph_path, + llm_config=llm_config, embed_config=embed_config ) return_codes = [] context = codes['context'] diff --git a/muagent/tools/codechat_tools.py b/muagent/tools/codechat_tools.py index 0695c69..9ed77b5 100644 --- a/muagent/tools/codechat_tools.py +++ b/muagent/tools/codechat_tools.py @@ -47,12 +47,17 @@ def run(cls, code_base_name, query, embed_config: EmbedConfig, llm_config: LLMCo code_limit = 1 # default + # search_result = search_code(code_base_name, query, code_limit, search_type=search_type, + # history_node_list=[], + # embed_engine=embed_config.embed_engine, embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, + # model_device=embed_config.model_device, model_name=llm_config.model_name, temperature=llm_config.temperature, + # api_base_url=llm_config.api_base_url, api_key=llm_config.api_key, embed_config=embed_config, use_nh=kargs.get("use_nh", True), + # local_graph_path=kargs.get("local_graph_path", "") + # ) search_result = search_code(code_base_name, query, code_limit, search_type=search_type, - history_node_list=[], - embed_engine=embed_config.embed_engine, embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, - model_device=embed_config.model_device, model_name=llm_config.model_name, temperature=llm_config.temperature, - api_base_url=llm_config.api_base_url, api_key=llm_config.api_key, embed_config=embed_config, use_nh=kargs.get("use_nh", True), - local_graph_path=kargs.get("local_graph_path", "") + history_node_list=[], use_nh=kargs.get("use_nh", True), + local_graph_path=kargs.get("local_graph_path", ""), + llm_config=llm_config, embed_config=embed_config ) if os.environ.get("log_verbose", "0") >= "3": logger.debug(search_result) diff --git a/muagent/tools/docs_retrieval.py b/muagent/tools/docs_retrieval.py index b06572d..db2bfcc 100644 --- a/muagent/tools/docs_retrieval.py +++ b/muagent/tools/docs_retrieval.py @@ -26,10 +26,13 @@ class ToolOutputArgs(BaseModel): def run(cls, query, knowledge_base_name, search_top=5, score_threshold=1.0, embed_config: EmbedConfig=EmbedConfig(), kb_root_path: str=""): """excute your tool!""" try: + # docs = search_docs(query, knowledge_base_name, search_top, score_threshold, + # kb_root_path=kb_root_path, embed_engine=embed_config.embed_engine, + # embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, + # model_device=embed_config.model_device + # ) docs = search_docs(query, knowledge_base_name, search_top, score_threshold, - kb_root_path=kb_root_path, embed_engine=embed_config.embed_engine, - embed_model=embed_config.embed_model, embed_model_path=embed_config.embed_model_path, - model_device=embed_config.model_device + kb_root_path=kb_root_path, llm_config=None, embed_config=embed_config ) except Exception as e: logger.exception(e) diff --git a/requirements.txt b/requirements.txt index f129643..f8be989 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -openai +openai==1.34.0 langchain==0.2.3 langchain_community==0.2.4 langchain_openai==0.1.8 -langchain_huggingface==1.3.0 +langchain_huggingface==0.0.3 sentence_transformers loguru # fastapi~=0.99.1 diff --git a/tests/connector/agent_test.py b/tests/connector/agent_test.py index f4b4c36..8f2f96c 100644 --- a/tests/connector/agent_test.py +++ b/tests/connector/agent_test.py @@ -13,6 +13,7 @@ api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] + model_engine = os.environ["model_engine"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] except Exception as e: @@ -20,10 +21,12 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = os.environ["model_engine"] embed_model = "" embed_model_path = "" logger.error(f"{e}") +# test local code src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -35,7 +38,7 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3, + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3, stop="**Observation:**" ) @@ -188,6 +191,6 @@ tools=tools, ) # base_agent.pre_print(query) -output_message = base_agent.step(query) -print(output_message.input_query) -print(output_message.parsed_output_list) +# output_message = base_agent.step(query) +# print(output_message.input_query) +# print(output_message.parsed_output_list) diff --git a/tests/connector/chain_test.py b/tests/connector/chain_test.py index 649b50f..5624ccf 100644 --- a/tests/connector/chain_test.py +++ b/tests/connector/chain_test.py @@ -13,6 +13,7 @@ api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] + model_engine = os.environ["model_engine"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] except Exception as e: @@ -20,10 +21,12 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = os.environ["model_engine"] embed_model = "" embed_model_path = "" logger.error(f"{e}") +# test local code src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -38,7 +41,7 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3, + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3, stop="**Observation:**" ) diff --git a/tests/connector/flow_test.py b/tests/connector/flow_test.py index b8070d9..d2b4b06 100644 --- a/tests/connector/flow_test.py +++ b/tests/connector/flow_test.py @@ -12,6 +12,7 @@ api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] + model_engine = os.environ["model_engine"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] except Exception as e: @@ -19,17 +20,22 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = os.environ["model_engine"] embed_model = "" embed_model_path = "" logger.error(f"{e}") - +# test local code +src_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +) +sys.path.append(src_dir) from muagent.llm_models.llm_config import EmbedConfig, LLMConfig from muagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler from muagent.base_configs.env_config import CB_ROOT_PATH llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3 + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3 ) # define your customized llm # llm_config = LLMConfig(llm=ReadingModel()) diff --git a/tests/connector/memory_manager_test.py b/tests/connector/memory_manager_test.py index 5518ea7..5a759dd 100644 --- a/tests/connector/memory_manager_test.py +++ b/tests/connector/memory_manager_test.py @@ -12,6 +12,7 @@ api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] + model_engine = os.environ["model_engine"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] except Exception as e: @@ -19,21 +20,22 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = os.environ["model_engine"] embed_model = "" embed_model_path = "" logger.error(f"{e}") +# test local code src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) -print(src_dir) sys.path.append(src_dir) from muagent.connector.memory_manager import LocalMemoryManager, Message from muagent.llm_models.llm_config import EmbedConfig, LLMConfig llm_config = LLMConfig( - model_name=model_name, model_type="openai", api_key=api_key, api_base_url=api_base_url, temperature=0.3, + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3, ) embed_config = EmbedConfig( diff --git a/tests/connector/phase_test.py b/tests/connector/phase_test.py index 158dd55..62d8d64 100644 --- a/tests/connector/phase_test.py +++ b/tests/connector/phase_test.py @@ -13,6 +13,7 @@ api_key = os.environ["OPENAI_API_KEY"] api_base_url= os.environ["API_BASE_URL"] model_name = os.environ["model_name"] + model_engine = os.environ["model_engine"] embed_model = os.environ["embed_model"] embed_model_path = os.environ["embed_model_path"] except Exception as e: @@ -20,10 +21,12 @@ api_key = "" api_base_url= "" model_name = "" + model_engine = os.environ["model_engine"] embed_model = "" embed_model_path = "" logger.error(f"{e}") +# test local code src_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -39,10 +42,11 @@ llm_config = LLMConfig( - model_name=model_name, api_key=api_key, api_base_url=api_base_url, temperature=0.3, + model_name=model_name, model_engine=model_engine, api_key=api_key, api_base_url=api_base_url, temperature=0.3, stop="**Observation:**" ) + embed_config = EmbedConfig( embed_engine="model", embed_model=embed_model, embed_model_path=embed_model_path ) diff --git a/tests/test_config.py.example b/tests/test_config.py.example index 01e99ca..6fe823f 100644 --- a/tests/test_config.py.example +++ b/tests/test_config.py.example @@ -7,7 +7,7 @@ os.environ["API_BASE_URL"] = OPENAI_API_BASE os.environ["OPENAI_API_KEY"] = "sk-xxx" openai.api_key = "sk-xxx" os.environ["model_name"] = "gpt-3.5-turbo" - +os.environ["model_engine] = "openai" # os.environ["embed_model"] = "{{embed_model_name}}" os.environ["embed_model_path"] = "{{embed_model_path}}" From 32bd3e6a852887f2630822866bbf0d512caed359 Mon Sep 17 00:00:00 2001 From: shanshi Date: Sun, 16 Jun 2024 16:55:44 +0800 Subject: [PATCH 3/5] debug chatbot' service --- muagent/chat/agent_chat.py | 10 +++++----- muagent/chat/base_chat.py | 10 ++++++---- muagent/chat/code_chat.py | 7 ++++--- muagent/chat/knowledge_chat.py | 4 ++-- muagent/chat/llm_chat.py | 4 ++-- muagent/chat/search_chat.py | 4 ++-- .../configs/prompts/qa_template_prompt.py | 14 ++++++-------- muagent/llm_models/openai_model.py | 5 ++--- muagent/service/service_factory.py | 2 +- 9 files changed, 30 insertions(+), 30 deletions(-) diff --git a/muagent/chat/agent_chat.py b/muagent/chat/agent_chat.py index 618b404..fe0dc0c 100644 --- a/muagent/chat/agent_chat.py +++ b/muagent/chat/agent_chat.py @@ -174,7 +174,7 @@ def chat_iterator(message: Message, local_memory: Memory, isDetailed=False): result["related_nodes"] = related_nodes # logger.debug(f"{result['figures'].keys()}, isDetailed: {isDetailed}") - message_str = step_content + message_str = final_content if self.stream: for token in message_str: result["answer"] = token @@ -238,8 +238,8 @@ def achat( custom_phase_configs, custom_chain_configs, custom_role_configs) # - params = locals() - params.pop("self") + # params = locals() + # params.pop("self") # embed_config: EmbedConfig = EmbedConfig(**params) # llm_config: LLMConfig = LLMConfig(**params) @@ -302,7 +302,7 @@ def chat_iterator(message: Message, local_memory: Memory, isDetailed=False): step_content = local_memory.to_str_messages(content_key='step_content', filter_roles=["human"]) step_content = "\n\n".join([f"{v}" for parsed_output in local_memory.get_parserd_output_list() for k, v in parsed_output.items() if k not in ["Action Status", "human", "user"]]) # logger.debug(f"{local_memory.get_parserd_output_list()}") - final_content = message.role_content + final_content = step_content or message.role_content result = { "answer": "", "db_docs": [str(doc) for doc in message.db_docs], @@ -322,7 +322,7 @@ def chat_iterator(message: Message, local_memory: Memory, isDetailed=False): result["related_nodes"] = related_nodes # logger.debug(f"{result['figures'].keys()}, isDetailed: {isDetailed}") - message_str = step_content + message_str = final_content if self.stream: for token in message_str: result["answer"] = token diff --git a/muagent/chat/base_chat.py b/muagent/chat/base_chat.py index 1df3938..6f89a25 100644 --- a/muagent/chat/base_chat.py +++ b/muagent/chat/base_chat.py @@ -3,7 +3,7 @@ import asyncio, json, os from typing import List, AsyncIterable -from langchain import LLMChain +from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler from langchain.prompts.chat import ChatPromptTemplate @@ -55,8 +55,8 @@ def chat( embed_config: EmbedConfig = Body({}, description="embedding_model config"), **kargs ): - params = locals() - params.pop("self", None) + # params = locals() + # params.pop("self", None) # llm_config: LLMConfig = LLMConfig(**params) # embed_config: EmbedConfig = EmbedConfig(**params) self.engine_name = engine_name if isinstance(engine_name, str) else engine_name.default @@ -78,6 +78,7 @@ def _chat(self, query: str, history: List[History], llm_config: LLMConfig, embed def chat_iterator(query: str, history: List[History]): # model = getChatModel() model = getChatModelFromConfig(llm_config) + model = model.llm result, content = self.create_task(query, history, model, llm_config, embed_config, **kargs) logger.info('result={}'.format(result)) @@ -142,6 +143,7 @@ async def chat_iterator(query, history): callback = AsyncIteratorCallbackHandler() # model = getChatModel() model = getChatModelFromConfig(llm_config) + model = model.llm task, result = self.create_atask(query, history, model, llm_config, embed_config, callback) if self.stream: @@ -166,7 +168,7 @@ def create_task(self, query: str, history: List[History], model, llm_config: LLM content = chain({"input": query}) return {"answer": "", "docs": ""}, content - def create_atask(self, query, history, model, llm_config: LLMConfig, embed_config: EmbedConfig, callback: AsyncIteratorCallbackHandler): + def create_atask(self, query, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, callback: AsyncIteratorCallbackHandler): chat_prompt = ChatPromptTemplate.from_messages( [i.to_msg_tuple() for i in history] + [("human", "{input}")] ) diff --git a/muagent/chat/code_chat.py b/muagent/chat/code_chat.py index 12ffa6b..2a7dba1 100644 --- a/muagent/chat/code_chat.py +++ b/muagent/chat/code_chat.py @@ -11,7 +11,7 @@ from typing import List from fastapi.responses import StreamingResponse -from langchain import LLMChain +from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler from langchain.prompts.chat import ChatPromptTemplate @@ -129,8 +129,8 @@ def chat( use_nh: bool =Body(True, description=""), **kargs ): - params = locals() - params.pop("self") + # params = locals() + # params.pop("self") # llm_config: LLMConfig = LLMConfig(**params) # embed_config: EmbedConfig = EmbedConfig(**params) self.engine_name = engine_name if isinstance(engine_name, str) else engine_name.default @@ -151,6 +151,7 @@ def _chat(self, query: str, history: List[History], llm_config: LLMConfig, embed def chat_iterator(query: str, history: List[History]): # model = getChatModel() model = getChatModelFromConfig(llm_config) + model = model.llm result, content = self.create_task(query, history, model, llm_config, embed_config, local_graph_path, **kargs) # logger.info('result={}'.format(result)) diff --git a/muagent/chat/knowledge_chat.py b/muagent/chat/knowledge_chat.py index 4d198a3..981dd04 100644 --- a/muagent/chat/knowledge_chat.py +++ b/muagent/chat/knowledge_chat.py @@ -3,7 +3,7 @@ from urllib.parse import urlencode from typing import List -from langchain import LLMChain +from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler from langchain.prompts.chat import ChatPromptTemplate @@ -75,7 +75,7 @@ def _process(self, query: str, history: List[History], model, llm_config: LLMCon result = {"answer": "", "docs": source_documents} return chain, context, result - def create_task(self, query: str, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, ): + def create_task(self, query: str, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, **kargs): '''构建 llm 生成任务''' logger.debug(f"query: {query}, history: {history}") chain, context, result = self._process(query, history, model, llm_config, embed_config) diff --git a/muagent/chat/llm_chat.py b/muagent/chat/llm_chat.py index 7d8887d..bf265b1 100644 --- a/muagent/chat/llm_chat.py +++ b/muagent/chat/llm_chat.py @@ -1,7 +1,7 @@ import asyncio from typing import List -from langchain import LLMChain +from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler from langchain.prompts.chat import ChatPromptTemplate @@ -31,7 +31,7 @@ def create_task(self, query: str, history: List[History], model, llm_config: LLM content = chain({"input": query}) return {"answer": "", "docs": ""}, content - def create_atask(self, query, history, model, llm_config: LLMConfig, embed_config: EmbedConfig, callback: AsyncIteratorCallbackHandler): + def create_atask(self, query, history: List[History], model, llm_config: LLMConfig, embed_config: EmbedConfig, callback: AsyncIteratorCallbackHandler): chat_prompt = ChatPromptTemplate.from_messages( [i.to_msg_tuple() for i in history] + [("human", "{input}")] ) diff --git a/muagent/chat/search_chat.py b/muagent/chat/search_chat.py index 3854b88..9e1351e 100644 --- a/muagent/chat/search_chat.py +++ b/muagent/chat/search_chat.py @@ -1,9 +1,9 @@ import os, asyncio from typing import List, Optional, Dict -from langchain import LLMChain +from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler -from langchain_community.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper +from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain.prompts.chat import ChatPromptTemplate from langchain_community.docstore.document import Document diff --git a/muagent/connector/configs/prompts/qa_template_prompt.py b/muagent/connector/configs/prompts/qa_template_prompt.py index 0eeb487..63f35fe 100644 --- a/muagent/connector/configs/prompts/qa_template_prompt.py +++ b/muagent/connector/configs/prompts/qa_template_prompt.py @@ -5,20 +5,18 @@ Based on the information provided, please answer the origin query concisely and professionally. Attention: Follow the input format and response output format -#### Input Format - -**Origin Query:** the initial question or objective that the user wanted to achieve - -**Context:** the current status and history of the tasks to determine if Origin Query has been achieved. - -**DocInfos:**: the relevant doc information or code information, if this is empty, don't refer to this. - #### Response Output Format **Action Status:** Set to 'Continued' or 'Stopped'. **Answer:** Response to the user's origin query based on Context and DocInfos. If DocInfos is empty, you can ignore it. If the answer cannot be derived from the given Context and DocInfos, please say 'The question cannot be answered based on the information provided' and do not add any fabricated elements to the answer. """ +# **Origin Query:** the initial question or objective that the user wanted to achieve + +# **Context:** the current status and history of the tasks to determine if Origin Query has been achieved. + +# **DocInfos:**: the relevant doc information or code information, if this is empty, don't refer to this. + CODE_QA_PROMPT = """#### Agent Profile diff --git a/muagent/llm_models/openai_model.py b/muagent/llm_models/openai_model.py index 2f32877..4d7c1a7 100644 --- a/muagent/llm_models/openai_model.py +++ b/muagent/llm_models/openai_model.py @@ -47,7 +47,7 @@ def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler VISIT_BIZ_LINE = os.environ.get("visit_biz_line") # zdatafront 提供的统一加密密钥 aes_secret_key = os.environ.get("aes_secret_key") - + # logger.debug(f"{VISIT_DOMAIN}, {VISIT_BIZ}, {VISIT_BIZ_LINE}, {aes_secret_key}") zdatafront_client = ZDataFrontClient(visit_domain=VISIT_DOMAIN, visit_biz=VISIT_BIZ, visit_biz_line=VISIT_BIZ_LINE, aes_secret_key=aes_secret_key) http_client = SyncProxyHttpClient(zdatafront_client=zdatafront_client, prefer_async=True) except Exception as e: @@ -112,8 +112,7 @@ def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler ) -def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None, ) -> Union[ChatOpenAI, LLM]: - +def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None, ) -> Union[ChatOpenAI, LLM, CustomLLMModel]: if llm_config and llm_config.llm and isinstance(llm_config.llm, LLM): return CustomLLMModel(llm=llm_config.llm) elif llm_config: diff --git a/muagent/service/service_factory.py b/muagent/service/service_factory.py index f59c7b9..14a3253 100644 --- a/muagent/service/service_factory.py +++ b/muagent/service/service_factory.py @@ -145,5 +145,5 @@ def get_kb_doc_details(kb_name: str, kb_root_path) -> List[Dict]: for i, v in enumerate(result.values()): v['No'] = i + 1 data.append(v) - + return data From b317fd74efb38538f3d5394dfb80e7315ed8cb35 Mon Sep 17 00:00:00 2001 From: shanshi Date: Sun, 16 Jun 2024 20:24:43 +0800 Subject: [PATCH 4/5] update langchain search wrapper use --- muagent/chat/search_chat.py | 2 +- muagent/llm_models/openai_model.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/muagent/chat/search_chat.py b/muagent/chat/search_chat.py index 9e1351e..6f3e5ec 100644 --- a/muagent/chat/search_chat.py +++ b/muagent/chat/search_chat.py @@ -3,7 +3,7 @@ from langchain.chains.llm import LLMChain from langchain.callbacks import AsyncIteratorCallbackHandler -from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper +from langchain_community.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain.prompts.chat import ChatPromptTemplate from langchain_community.docstore.document import Document diff --git a/muagent/llm_models/openai_model.py b/muagent/llm_models/openai_model.py index 4d7c1a7..ce492ed 100644 --- a/muagent/llm_models/openai_model.py +++ b/muagent/llm_models/openai_model.py @@ -113,12 +113,14 @@ def __init__(self, llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler def getChatModelFromConfig(llm_config: LLMConfig, callBack: AsyncIteratorCallbackHandler = None, ) -> Union[ChatOpenAI, LLM, CustomLLMModel]: + # logger.debug(f"{llm_config}") if llm_config and llm_config.llm and isinstance(llm_config.llm, LLM): return CustomLLMModel(llm=llm_config.llm) elif llm_config: model_class_dict = {"openai": OpenAILLMModel, "lingyiwanwu": LYWWLLMModel} model_class = model_class_dict[llm_config.model_engine] model = model_class(llm_config, callBack) + # logger.debug(f"{model.llm}") return model else: return OpenAILLMModel(llm_config, callBack) From 9f219a0ee59ed9a7f67b736f844095da739c819d Mon Sep 17 00:00:00 2001 From: shanshi Date: Tue, 18 Jun 2024 11:44:13 +0800 Subject: [PATCH 5/5] update muagent to 0.0.5 --- README.md | 6 +++--- README_zh.md | 6 +++--- setup.py | 14 ++++++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1f1c601..929f475 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,8 @@ Developed by the Ant CodeFuse Team, CodeFuse-muAgent is a Multi-Agent framework ![](docs/resources/agent_runtime.png) ## 🚀 快速使用 -For complete documentation, see: [CodeFuse-muAgent](docs/overview/o1.muagent.md) -For more [demos](docs/overview/o3.quick-start.md) +For complete documentation, see: [CodeFuse-muAgent](https://codefuse-ai.github.io/docs/api-docs/MuAgent/overview/multi-agent) +For more [demos](https://codefuse-ai.github.io/docs/api-docs/MuAgent/connector/customed_examples) 1. Installation ``` @@ -115,7 +115,7 @@ We are deeply grateful for your interest in the Codefuse project and warmly welc Feel free to raise your suggestions, opinions, and comments directly through GitHub Issues. There are numerous ways to participate in and contribute to the Codefuse project: code implementation, writing tests, process tool improvements, documentation enhancements, etc. -We welcome any contribution and will add you to the list of contributors. See [Contribution Guide...](docs/contribution/contribute_guide.md) +We welcome any contribution and will add you to the list of contributors. See [Contribution Guide...](https://codefuse-ai.github.io/contribution/contribution) ## 🗂 Miscellaneous diff --git a/README_zh.md b/README_zh.md index 9981b58..498902e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -34,8 +34,8 @@ CodeFuse-muAgent 是蚂蚁CodeFuse团队开发的Mulit Agent框架,其核心 ## 🚀 快速使用 -完整文档见:[CodeFuse-muAgent](docs/overview/o1.muagent.md) -更多[demo](docs/overview/o3.quick-start.md) +完整文档见:[CodeFuse-muAgent](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent) +更多[demo](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/connector/customed_examples) 1. 安装 ``` @@ -120,7 +120,7 @@ print(output_memory3.to_str_messages(return_all=True, content_key="parsed_output 您对 Codefuse 的各种建议、意见、评论可以直接通过 GitHub 的 Issues 提出。 -参与 Codefuse 项目并为其作出贡献的方法有很多:代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎,并将您加入贡献者列表。详见[Contribution Guide...](docs/contribution/contribute_guide.md) +参与 Codefuse 项目并为其作出贡献的方法有很多:代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎,并将您加入贡献者列表。详见[Contribution Guide...](https://codefuse-ai.github.io/zh-CN/contribution/issue) ## 🗂 其他 diff --git a/setup.py b/setup.py index 1db8fe2..e6c8321 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="codefuse-muagent", - version="0.0.4", + version="0.0.5", author="shanshi", author_email="wyp311395@antgroup.com", description="A multi-agent framework that facilitates the rapid construction of collaborative teams of agents.", @@ -19,21 +19,27 @@ "Operating System :: OS Independent", ], install_requires=[ - "openai==0.28.1", - "langchain<=0.0.266", + "openai==1.34.0", + "langchain==0.2.3", + "langchain_community==0.2.4", + "langchain_openai==0.1.8", + "langchain_huggingface==0.0.3", "sentence_transformers", "loguru", - "fastapi~=0.99.1", + "fastapi", "pandas", "Pyarrow", "jieba", "psutil", "faiss-cpu", "notebook", + "docker", + "sseclient", # "chromadb==0.4.17", "javalang==0.13.0", "nebula3-python==3.1.0", + "SQLAlchemy==2.0.19", "redis==5.0.1", "pydantic<=1.10.14" ],