ggml-org · samolego · May 7, 2025 · May 8, 2025 · May 8, 2025 · May 12, 2025
diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json
@@ -33,6 +33,7 @@
     "remark-math": "^6.0.0",
     "tailwindcss": "^4.1.1",
     "textlinestream": "^1.1.1",
+    "unist-util-visit": "^5.0.0",
     "vite-plugin-singlefile": "^2.0.3"
   },
   "devDependencies": {

@@ -4,9 +4,7 @@ import { isNumeric } from './utils/misc';
 export const isDev = import.meta.env.MODE === 'development';
 
 // constants
-export const BASE_URL = new URL('.', document.baseURI).href
-  .toString()
-  .replace(/\/$/, '');
+export const BASE_URL = 'http://127.0.0.1:8080';
 
 export const CONFIG_DEFAULT = {
   // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
@@ -39,6 +37,7 @@ export const CONFIG_DEFAULT = {
   custom: '', // custom json-stringified object
   // experimental features
   pyIntepreterEnabled: false,
+  jsInterpreterToolUse: false,
 };
 export const CONFIG_INFO: Record<string, string> = {
   apiKey: 'Set the API Key if you are using --api-key option for the server.',

@@ -48,17 +48,22 @@ export default function ChatMessage({
   const nextSibling = siblingLeafNodeIds[siblingCurrIdx + 1];
   const prevSibling = siblingLeafNodeIds[siblingCurrIdx - 1];
 
-  // for reasoning model, we split the message into content and thought
-  // TODO: implement this as remark/rehype plugin in the future
+  // for reasoning model, we split the message into content, thought, and tool output
   const { content, thought, isThinking }: SplitMessage = useMemo(() => {
-    if (msg.content === null || msg.role !== 'assistant') {
+    if (
+      msg.content === null ||
+      (msg.role !== 'assistant' && msg.role !== 'tool')
+    ) {
       return { content: msg.content };
     }
+
     let actualContent = '';
     let thought = '';
     let isThinking = false;
     let thinkSplit = msg.content.split('<think>', 2);
+
     actualContent += thinkSplit[0];
+
     while (thinkSplit[1] !== undefined) {
       // <think> tag found
       thinkSplit = thinkSplit[1].split('</think>', 2);
@@ -71,11 +76,13 @@ export default function ChatMessage({
         actualContent += thinkSplit[0];
       }
     }
+
     return { content: actualContent, thought, isThinking };
   }, [msg]);
-
   if (!viewingChat) return null;
 
+  const toolCalls = msg.tool_calls ?? null;
+
   return (
     <div className="group" id={id}>
       <div
@@ -125,8 +132,12 @@ export default function ChatMessage({
             <>
               {content === null ? (
                 <>
-                  {/* show loading dots for pending message */}
-                  <span className="loading loading-dots loading-md"></span>
+                  {toolCalls ? null : (
+                    <>
+                      {/* show loading dots for pending message */}
+                      <span className="loading loading-dots loading-md"></span>
+                    </>
+                  )}
                 </>
               ) : (
                 <>
@@ -188,13 +199,53 @@ export default function ChatMessage({
                       </details>
                     )}
 
-                    <MarkdownDisplay
-                      content={content}
-                      isGenerating={isPending}
-                    />
+                    {msg.role === 'tool' ? (
+                      <details
+                        className="collapse bg-base-200 collapse-arrow mb-4"
+                        open={true}
+                      >
+                        <summary className="collapse-title">
+                          <b>Tool call result</b>
+                        </summary>
+                        <div className="collapse-content">
+                          <MarkdownDisplay
+                            content={content}
+                            isGenerating={false} // Tool results are not "generating"
+                          />
+                        </div>
+                      </details>
+                    ) : (
+                      <MarkdownDisplay
+                        content={content}
+                        isGenerating={isPending}
+                      />
+                    )}
                   </div>
                 </>
               )}
+              {toolCalls &&
+                toolCalls.map((toolCall, i) => (
+                  <details
+                    key={i}
+                    className="collapse bg-base-200 collapse-arrow mb-4"
+                    open={false} // todo: make this configurable like showThoughtInProgress
+                  >
+                    <summary className="collapse-title">
+                      <b>Tool call:</b> {toolCall.function.name}
+                    </summary>
+
+                    <div className="collapse-content">
+                      <div className="font-bold mb-1">Arguments:</div>
+                      <pre className="whitespace-pre-wrap bg-base-300 p-2 rounded">
+                        {JSON.stringify(
+                          JSON.parse(toolCall.function.arguments),
+                          null,
+                          2
+                        )}
+                      </pre>
+                    </div>
+                  </details>
+                ))}
               {/* render timings if enabled */}
               {timings && config.showTokensPerSecond && (
                 <div className="dropdown dropdown-hover dropdown-top mt-2">

@@ -254,6 +254,21 @@ const SETTING_SECTIONS: SettingSection[] = [
         ),
         key: 'pyIntepreterEnabled',
       },
+      {
+        type: SettingInputType.CHECKBOX,
+        label: (
+          <>
+            <b>Enable JavaScript tool use</b>
+            <br />
+            <small className="text-xs">
+              This alows LLM to use browser your browser console as tool. If
+              model supports function calling, it can use the console to do e.g.
+              data analysis etc. by itself.
+            </small>
+          </>
+        ),
+        key: 'jsInterpreterToolUse',
+      },
     ],
   },
 ];

@@ -5,6 +5,7 @@ import {
   Conversation,
   Message,
   PendingMessage,
+  ToolCall,
   ViewingChat,
 } from './types';
 import StorageUtils from './storage';
@@ -15,6 +16,7 @@ import {
 } from './misc';
 import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
 import { matchPath, useLocation, useNavigate } from 'react-router';
+import { AVAILABLE_TOOLS } from './tool_calling/register_tools';
 
 interface AppContextValue {
   // conversations and messages
@@ -181,10 +183,21 @@ export const AppContextProvider = ({
       }
       if (isDev) console.log({ messages });
 
+      // tool calling from clientside
+      const enabledTools = Array.from(
+        AVAILABLE_TOOLS,
+        ([_name, tool], _index) => tool
+      )
+        .filter((tool) => tool.enabled())
+        .map((tool) => tool.specs());
+
+      // stream does not support tool-use (yet?)
+      const streamResponse = enabledTools.length === 0;
+
       // prepare params
       const params = {
         messages,
-        stream: true,
+        stream: streamResponse,
         cache_prompt: true,
         samplers: config.samplers,
         temperature: config.temperature,
@@ -206,6 +219,7 @@ export const AppContextProvider = ({
         dry_penalty_last_n: config.dry_penalty_last_n,
         max_tokens: config.max_tokens,
         timings_per_token: !!config.showTokensPerSecond,
+        tools: enabledTools.length > 0 ? enabledTools : undefined,
         ...(config.custom.length ? JSON.parse(config.custom) : {}),
       };
 
@@ -221,36 +235,144 @@ export const AppContextProvider = ({
         body: JSON.stringify(params),
         signal: abortController.signal,
       });
+
       if (fetchResponse.status !== 200) {
         const body = await fetchResponse.json();
         throw new Error(body?.error?.message || 'Unknown error');
       }
-      const chunks = getSSEStreamAsync(fetchResponse);
-      for await (const chunk of chunks) {
-        // const stop = chunk.stop;
-        if (chunk.error) {
-          throw new Error(chunk.error?.message || 'Unknown error');
+
+      // Tool calls results we will process later
+      const pendingMessages: PendingMessage[] = [];
+      let lastMsgId = pendingMsg.id;
+      let shouldContinueChain = false;
+
+      if (streamResponse) {
+        const chunks = getSSEStreamAsync(fetchResponse);
+        for await (const chunk of chunks) {
+          // const stop = chunk.stop;
+          if (chunk.error) {
+            throw new Error(chunk.error?.message || 'Unknown error');
+          }
+          const addedContent = chunk.choices[0].delta.content;
+          const lastContent = pendingMsg.content || '';
+          if (addedContent) {
+            pendingMsg = {
+              ...pendingMsg,
+              content: lastContent + addedContent,
+            };
+          }
+          const timings = chunk.timings;
+          if (timings && config.showTokensPerSecond) {
+            // only extract what's really needed, to save some space
+            pendingMsg.timings = {
+              prompt_n: timings.prompt_n,
+              prompt_ms: timings.prompt_ms,
+              predicted_n: timings.predicted_n,
+              predicted_ms: timings.predicted_ms,
+            };
+          }
+          setPending(convId, pendingMsg);
+          onChunk(); // don't need to switch node for pending message
         }
-        const addedContent = chunk.choices[0].delta.content;
-        const lastContent = pendingMsg.content || '';
-        if (addedContent) {
+      } else {
+        const responseData = await fetchResponse.json();
+        if (isDev) console.log({ responseData });
+        if (responseData.error) {
+          throw new Error(responseData.error?.message || 'Unknown error');
+        }
+
+        const choice = responseData.choices[0];
+        const messageFromAPI = choice.message;
+        console.log({ messageFromAPI });
+        let newContent = '';
+
+        if (messageFromAPI.content) {
+          newContent = messageFromAPI.content;
+          console.log(newContent);
+        }
+
+        // Process tool calls
+        if (messageFromAPI.tool_calls && messageFromAPI.tool_calls.length > 0) {
+          // Store the raw tool calls in the pendingMsg
           pendingMsg = {
             ...pendingMsg,
-            content: lastContent + addedContent,
+            tool_calls: messageFromAPI.tool_calls as ToolCall[],
           };
+
+          for (let i = 0; i < messageFromAPI.tool_calls.length; i++) {
+            const tc = messageFromAPI.tool_calls[i] as ToolCall;
+            if (tc) {
+              // Set up call id
+              tc.call_id ??= `call_${i}`;
+
+              if (isDev) console.log({ tc });
+
+              // Process tool call
+              const toolResult = AVAILABLE_TOOLS.get(
+                tc.function.name
+              )?.processCall(tc);
+
+              const toolMsg: PendingMessage = {
+                id: lastMsgId + 1,
+                type: 'text',
+                convId: convId,
+                content: toolResult?.output ?? 'Error: invalid tool call!',
+                timestamp: Date.now(),
+                role: 'tool',
+                parent: lastMsgId,
+                children: [],
+              };
+              pendingMessages.push(toolMsg);
+              lastMsgId += 1;
+            }
+          }
         }
-        const timings = chunk.timings;
-        if (timings && config.showTokensPerSecond) {
-          // only extract what's really needed, to save some space
+
+        if (newContent !== '') {
+          pendingMsg = {
+            ...pendingMsg,
+            content: newContent,
+          };
+        }
+
+        // Handle timings from the non-streaming response
+        // The exact location of 'timings' in responseData might vary by API.
+        // Assuming responseData.timings similar to streaming chunk for now.
+        const apiTimings = responseData.timings;
+        if (apiTimings && config.showTokensPerSecond) {
           pendingMsg.timings = {
-            prompt_n: timings.prompt_n,
-            prompt_ms: timings.prompt_ms,
-            predicted_n: timings.predicted_n,
-            predicted_ms: timings.predicted_ms,
+            prompt_n: apiTimings.prompt_n,
+            prompt_ms: apiTimings.prompt_ms,
+            predicted_n: apiTimings.predicted_n,
+            predicted_ms: apiTimings.predicted_ms,
           };
         }
-        setPending(convId, pendingMsg);
-        onChunk(); // don't need to switch node for pending message
+
+        for (const pendMsg of pendingMessages) {
+          console.log('Setting pending message', pendMsg.id);
+          setPending(convId, pendMsg);
+        }
+
+        onChunk(); // Update UI to show the processed message
+
+        shouldContinueChain = choice.finish_reason === 'tool_calls';
+      }
+
+      pendingMessages.unshift(pendingMsg);
+      if (
+        pendingMsg.content !== null ||
+        (pendingMsg.tool_calls?.length ?? 0) > 0
+      ) {
+        await StorageUtils.appendMsgChain(
+          pendingMessages as Message[],
+          leafNodeId
+        );
+
+        // if message ended due to "finish_reason": "tool_calls"
+        // resend it to assistant to process the result.
+        if (shouldContinueChain) {
+          await generateMessage(convId, lastMsgId, onChunk);
+        }
       }
     } catch (err) {
       setPending(convId, null);
@@ -265,9 +387,6 @@ export const AppContextProvider = ({
       }
     }
 
-    if (pendingMsg.content !== null) {
-      await StorageUtils.appendMsg(pendingMsg as Message, leafNodeId);
-    }
     setPending(convId, null);
     onChunk(pendingId); // trigger scroll to bottom and switch to the last node
   };
@@ -293,7 +412,7 @@ export const AppContextProvider = ({
 
     const now = Date.now();
     const currMsgId = now;
-    StorageUtils.appendMsg(
+    await StorageUtils.appendMsg(
       {
         id: currMsgId,
         timestamp: now,