Skip to content

Commit a00e54c

Browse files
authored
feat: support web search (#609)
* feat: add web search switch button Signed-off-by: Bob Du <i@bobdu.cc> * refactor: extracting create content helper function Signed-off-by: Bob Du <i@bobdu.cc> * feat: add search api provider and api key config page Signed-off-by: Bob Du <i@bobdu.cc> * feat: add web search logic in backend Signed-off-by: Bob Du <i@bobdu.cc> * chore: improve chat system message Signed-off-by: Bob Du <i@bobdu.cc> --------- Signed-off-by: Bob Du <i@bobdu.cc>
1 parent 6a978e0 commit a00e54c

File tree

18 files changed

+517
-57
lines changed

18 files changed

+517
-57
lines changed

service/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
"common:cleanup": "rimraf node_modules && rimraf pnpm-lock.yaml"
2929
},
3030
"dependencies": {
31+
"@tavily/core": "^0.5.3",
3132
"axios": "^1.8.4",
32-
"dayjs": "^1.11.7",
33+
"dayjs": "^1.11.13",
3334
"dotenv": "^16.0.3",
3435
"express": "^5.1.0",
3536
"express-rate-limit": "^6.7.0",

service/pnpm-lock.yaml

Lines changed: 28 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

service/src/chatgpt/index.ts

Lines changed: 151 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import * as dotenv from 'dotenv'
22
import OpenAI from 'openai'
33
import { HttpsProxyAgent } from 'https-proxy-agent'
4+
import { tavily } from '@tavily/core'
5+
import dayjs from 'dayjs'
46
import type { AuditConfig, KeyConfig, UserInfo } from '../storage/model'
57
import { Status, UsageResponse } from '../storage/model'
68
import { convertImageUrl } from '../utils/image'
@@ -10,11 +12,81 @@ import { getCacheApiKeys, getCacheConfig, getOriginConfig } from '../storage/con
1012
import { sendResponse } from '../utils'
1113
import { hasAnyRole, isNotEmptyString } from '../utils/is'
1214
import type { ModelConfig } from '../types'
13-
import { getChatByMessageId, updateRoomChatModel } from '../storage/mongo'
15+
import { getChatByMessageId, updateChatSearchQuery, updateChatSearchResult } from '../storage/mongo'
1416
import type { ChatMessage, RequestOptions } from './types'
1517

1618
dotenv.config()
1719

20+
function systemMessageWithSearchResult(currentTime: string): string {
21+
return `You are an intelligent assistant that needs to answer user questions based on search results.
22+
23+
**Search Results Format Description:**
24+
- Search results may contain irrelevant information, please filter and use accordingly
25+
26+
**Context Information:**
27+
- Current time: ${currentTime}
28+
29+
**Response Requirements:**
30+
31+
1. **Content Processing**
32+
- Screen and filter search results, selecting content most relevant to the question
33+
- Synthesize information from multiple web pages, avoiding repetitive citations from a single source
34+
- Do not mention specific sources or rankings of search results
35+
36+
2. **Response Strategy**
37+
- **Listing questions**: Limit to within 10 key points, prioritize providing the most relevant and complete information
38+
- **Creative questions**: Make full use of search results to generate in-depth professional long-form answers
39+
- **Objective Q&A**: Brief answers may appropriately supplement 1-2 sentences of related information
40+
41+
3. **Format Requirements**
42+
- Respond using markdown (latex start with $).
43+
- Use structured, paragraph-based answer format
44+
- When answering in points, limit to within 5 points, merging related content
45+
- Ensure answers are aesthetically pleasing and highly readable
46+
47+
4. **Language Standards**
48+
- Keep answer language consistent with user's question language
49+
- Do not change language unless specifically requested by the user
50+
51+
**Notes:**
52+
- Not all search results are relevant, need to judge based on the question
53+
- For listing questions, inform users they can check search sources for complete information
54+
- Creative answers need to be multi-perspective, information-rich, and thoroughly discussed`
55+
}
56+
57+
function systemMessageGetSearchQuery(currentTime: string): string {
58+
return `You are an intelligent search assistant.
59+
Current time: ${currentTime}
60+
61+
Before formally answering user questions, you need to analyze the user's questions and conversation context to determine whether you need to obtain more information through internet search to provide accurate answers.
62+
63+
**Task Flow:**
64+
1. Carefully analyze the user's question content and previous conversation history
65+
2. Combined with the current time, determine whether the question involves time-sensitive information
66+
3. Evaluate whether existing knowledge is sufficient to answer the question
67+
4. If search is needed, generate a precise search query
68+
5. If search is not needed, return empty result
69+
70+
**Output Format Requirements:**
71+
- If search is needed: return <search_query>example search query keywords</search_query>
72+
- If search is not needed: return <search_query></search_query>
73+
- Do not include any other explanations or answer content
74+
- Search query should be concise and clear, able to obtain the most relevant information
75+
76+
**Judgment Criteria:**
77+
- Time-sensitive information (such as latest news, stock prices, weather, real-time data, etc.): search needed
78+
- Latest policies, regulations, technological developments: may need search
79+
- Common sense questions, historical facts, basic knowledge: usually no search needed
80+
- Latest research or developments in professional fields: search recommended
81+
82+
**Notes:**
83+
- Search query should target the core needs of user questions
84+
- Consider the timeliness and accuracy requirements of information
85+
- Prioritize obtaining the latest and most authoritative information sources
86+
87+
Please strictly return results according to the above format.`
88+
}
89+
1890
const ErrorCodeMessage: Record<string, string> = {
1991
401: '[OpenAI] 提供错误的API密钥 | Incorrect API key provided',
2092
403: '[OpenAI] 服务器拒绝访问,请稍后再试 | Server refused to access, please try again later',
@@ -49,17 +121,16 @@ export async function initApi(key: KeyConfig) {
49121
const processThreads: { userId: string; abort: AbortController; messageId: string }[] = []
50122

51123
async function chatReplyProcess(options: RequestOptions) {
124+
const globalConfig = await getCacheConfig()
52125
const model = options.room.chatModel
126+
const searchEnabled = options.room.searchEnabled
53127
const key = await getRandomApiKey(options.user, model)
54128
const userId = options.user._id.toString()
55129
const maxContextCount = options.user.advanced.maxContextCount ?? 20
56130
const messageId = options.messageId
57131
if (key == null || key === undefined)
58132
throw new Error('没有对应的apikeys配置。请再试一次 | No available apikeys configuration. Please try again.')
59133

60-
// Add Chat Record
61-
updateRoomChatModel(userId, options.room.roomId, model)
62-
63134
const { message, uploadFileKeys, parentMessageId, process, systemMessage, temperature, top_p } = options
64135

65136
try {
@@ -85,32 +156,59 @@ async function chatReplyProcess(options: RequestOptions) {
85156
}
86157

87158
// Prepare the user message content (text and images)
88-
let content: string | OpenAI.Chat.ChatCompletionContentPart[] = message
89-
90-
// Handle image uploads if present
91-
if (uploadFileKeys && uploadFileKeys.length > 0) {
92-
content = [
93-
{
94-
type: 'text',
95-
text: message,
96-
},
97-
]
98-
for (const uploadFileKey of uploadFileKeys) {
99-
content.push({
100-
type: 'image_url',
101-
image_url: {
102-
url: await convertImageUrl(uploadFileKey),
103-
},
104-
})
105-
}
106-
}
159+
const content: string | OpenAI.Chat.ChatCompletionContentPart[] = await createContent(message, uploadFileKeys)
107160

108161
// Add the user message
109162
messages.push({
110163
role: 'user',
111164
content,
112165
})
113166

167+
let hasSearchResult = false
168+
const searchConfig = globalConfig.searchConfig
169+
if (searchConfig.enabled && searchConfig?.options?.apiKey && searchEnabled) {
170+
messages[0].content = systemMessageGetSearchQuery(dayjs().format('YYYY-MM-DD HH:mm:ss'))
171+
const completion = await openai.chat.completions.create({
172+
model,
173+
messages,
174+
})
175+
let searchQuery: string = completion.choices[0].message.content
176+
const match = searchQuery.match(/<search_query>([\s\S]*)<\/search_query>/i)
177+
if (match)
178+
searchQuery = match[1].trim()
179+
else
180+
searchQuery = ''
181+
182+
if (searchQuery) {
183+
await updateChatSearchQuery(messageId, searchQuery)
184+
185+
const tvly = tavily({ apiKey: searchConfig.options?.apiKey })
186+
const response = await tvly.search(
187+
searchQuery,
188+
{
189+
includeRawContent: true,
190+
timeout: 300,
191+
},
192+
)
193+
194+
const searchResult = JSON.stringify(response)
195+
await updateChatSearchResult(messageId, searchResult)
196+
197+
messages.push({
198+
role: 'user',
199+
content: `Additional information from web searche engine.
200+
search query: <search_query>${searchQuery}</search_query>
201+
search result: <search_result>${searchResult}</search_result>`,
202+
})
203+
204+
messages[0].content = systemMessageWithSearchResult(dayjs().format('YYYY-MM-DD HH:mm:ss'))
205+
hasSearchResult = true
206+
}
207+
}
208+
209+
if (!hasSearchResult)
210+
messages[0].content = systemMessage
211+
114212
// Create the chat completion with streaming
115213
const stream = await openai.chat.completions.create({
116214
model,
@@ -244,26 +342,7 @@ async function getMessageById(id: string): Promise<ChatMessage | undefined> {
244342
}
245343
else {
246344
if (isPrompt) { // prompt
247-
let content: string | OpenAI.Chat.ChatCompletionContentPart[] = chatInfo.prompt
248-
if (chatInfo.images && chatInfo.images.length > 0) {
249-
content = [
250-
{
251-
type: 'text',
252-
text: chatInfo.prompt,
253-
},
254-
]
255-
for (const image of chatInfo.images) {
256-
const imageUrlBase64 = await convertImageUrl(image)
257-
if (imageUrlBase64) {
258-
content.push({
259-
type: 'image_url',
260-
image_url: {
261-
url: await convertImageUrl(image),
262-
},
263-
})
264-
}
265-
}
266-
}
345+
const content: string | OpenAI.Chat.ChatCompletionContentPart[] = await createContent(chatInfo.prompt, chatInfo.images)
267346
return {
268347
id,
269348
parentMessageId,
@@ -311,6 +390,35 @@ async function getRandomApiKey(user: UserInfo, chatModel: string): Promise<KeyCo
311390
return randomKeyConfig(keys)
312391
}
313392

393+
// Helper function to create content with text and optional images
394+
async function createContent(text: string, images?: string[]): Promise<string | OpenAI.Chat.ChatCompletionContentPart[]> {
395+
// If no images or empty array, return just the text
396+
if (!images || images.length === 0)
397+
return text
398+
399+
// Create content with text and images
400+
const content: OpenAI.Chat.ChatCompletionContentPart[] = [
401+
{
402+
type: 'text',
403+
text,
404+
},
405+
]
406+
407+
for (const image of images) {
408+
const imageUrl = await convertImageUrl(image)
409+
if (imageUrl) {
410+
content.push({
411+
type: 'image_url',
412+
image_url: {
413+
url: imageUrl,
414+
},
415+
})
416+
}
417+
}
418+
419+
return content
420+
}
421+
314422
// Helper function to add previous messages to the conversation context
315423
async function addPreviousMessages(parentMessageId: string, maxContextCount: number, messages: OpenAI.Chat.ChatCompletionMessageParam[]): Promise<void> {
316424
// Recursively get previous messages

service/src/index.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,33 @@ router.post('/audit-test', rootAuth, async (req, res) => {
781781
}
782782
})
783783

784+
router.post('/setting-search', rootAuth, async (req, res) => {
785+
try {
786+
const config = req.body as import('./storage/model').SearchConfig
787+
788+
const thisConfig = await getOriginConfig()
789+
thisConfig.searchConfig = config
790+
const result = await updateConfig(thisConfig)
791+
clearConfigCache()
792+
res.send({ status: 'Success', message: '操作成功 | Successfully', data: result.searchConfig })
793+
}
794+
catch (error) {
795+
res.send({ status: 'Fail', message: error.message, data: null })
796+
}
797+
})
798+
799+
router.post('/search-test', rootAuth, async (req, res) => {
800+
try {
801+
const { search, text } = req.body as { search: import('./storage/model').SearchConfig; text: string }
802+
// TODO: Implement actual search test logic with Tavily API
803+
// For now, just return a success response
804+
res.send({ status: 'Success', message: '搜索测试成功 | Search test successful', data: { query: text, results: [] } })
805+
}
806+
catch (error) {
807+
res.send({ status: 'Fail', message: error.message, data: null })
808+
}
809+
})
810+
784811
router.post('/setting-advanced', auth, async (req, res) => {
785812
try {
786813
const config = req.body as {

0 commit comments

Comments
 (0)