Add support for local shell, image generator, code interpreter tools

rm-openai · rm-openai · commit a1647a630574 · 2025-05-21T15:16:00.000-04:00
diff --git a/examples/tools/code_interpreter.py b/examples/tools/code_interpreter.py
@@ -0,0 +1,34 @@
+import asyncio
+
+from agents import Agent, CodeInterpreterTool, Runner, trace
+
+
+async def main():
+    agent = Agent(
+        name="Code interpreter",
+        instructions="You love doing math.",
+        tools=[
+            CodeInterpreterTool(
+                tool_config={"type": "code_interpreter", "container": {"type": "auto"}},
+            )
+        ],
+    )
+
+    with trace("Code interpreter example"):
+        print("Solving math problem...")
+        result = Runner.run_streamed(agent, "What is the square root of273 * 312821 plus 1782?")
+        async for event in result.stream_events():
+            if (
+                event.type == "run_item_stream_event"
+                and event.item.type == "tool_call_item"
+                and event.item.raw_item.type == "code_interpreter_call"
+            ):
+                print(f"Code interpreter code:\n```\n{event.item.raw_item.code}\n```\n")
+            elif event.type == "run_item_stream_event":
+                print(f"Other event: {event.item.type}")
+
+        print(f"Final output: {result.final_output}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tools/image_generator.py b/examples/tools/image_generator.py
@@ -0,0 +1,54 @@
+import asyncio
+import base64
+import os
+import subprocess
+import sys
+import tempfile
+
+from agents import Agent, ImageGenerationTool, Runner, trace
+
+
+def open_file(path: str) -> None:
+    if sys.platform.startswith("darwin"):
+        subprocess.run(["open", path], check=False)  # macOS
+    elif os.name == "nt":
+        os.startfile(path)  # type-ignore[attr-define] Windows only
+    elif os.name == "posix":
+        subprocess.run(["xdg-open", path], check=False)  # Linux/Unix
+    else:
+        print(f"Don't know how to open files on this platform: {sys.platform}")
+
+
+async def main():
+    agent = Agent(
+        name="Image generator",
+        instructions="You are a helpful agent.",
+        tools=[
+            ImageGenerationTool(
+                tool_config={"type": "image_generation", "quality": "low"},
+            )
+        ],
+    )
+
+    with trace("Image generation example"):
+        print("Generating image, this may take a while...")
+        result = await Runner.run(
+            agent, "Create an image of a frog eating a pizza, comic book style."
+        )
+        print(result.final_output)
+        for item in result.new_items:
+            if (
+                item.type == "tool_call_item"
+                and item.raw_item.type == "image_generation_call"
+                and (img_result := item.raw_item.result)
+            ):
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+                    tmp.write(base64.b64decode(img_result))
+                    temp_path = tmp.name
+
+                # Open the image
+                open_file(temp_path)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
@@ -54,11 +54,16 @@
     StreamEvent,
 )
 from .tool import (
+    CodeInterpreterTool,
     ComputerTool,
     FileSearchTool,
     FunctionTool,
     FunctionToolResult,
     HostedMCPTool,
+    ImageGenerationTool,
+    LocalShellCommandRequest,
+    LocalShellExecutor,
+    LocalShellTool,
     MCPToolApprovalFunction,
     MCPToolApprovalFunctionResult,
     MCPToolApprovalRequest,
@@ -210,6 +215,11 @@ def enable_verbose_stdout_logging():
     "FunctionToolResult",
     "ComputerTool",
     "FileSearchTool",
+    "CodeInterpreterTool",
+    "ImageGenerationTool",
+    "LocalShellCommandRequest",
+    "LocalShellExecutor",
+    "LocalShellTool",
     "Tool",
     "WebSearchTool",
     "HostedMCPTool",
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -14,6 +14,9 @@
     ResponseFunctionWebSearch,
     ResponseOutputMessage,
 )
+from openai.types.responses.response_code_interpreter_tool_call import (
+    ResponseCodeInterpreterToolCall,
+)
 from openai.types.responses.response_computer_tool_call import (
     ActionClick,
     ActionDoubleClick,
@@ -26,7 +29,12 @@
     ActionWait,
 )
 from openai.types.responses.response_input_param import ComputerCallOutput, McpApprovalResponse
-from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
+from openai.types.responses.response_output_item import (
+    ImageGenerationCall,
+    LocalShellCall,
+    McpApprovalRequest,
+    McpListTools,
+)
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 
 from .agent import Agent, ToolsToFinalOutputResult
@@ -61,6 +69,8 @@
     FunctionTool,
     FunctionToolResult,
     HostedMCPTool,
+    LocalShellCommandRequest,
+    LocalShellTool,
     MCPToolApprovalRequest,
     Tool,
 )
@@ -129,12 +139,19 @@ class ToolRunMCPApprovalRequest:
     mcp_tool: HostedMCPTool
 
 
+@dataclass
+class ToolRunLocalShellCall:
+    tool_call: LocalShellCall
+    local_shell_tool: LocalShellTool
+
+
 @dataclass
 class ProcessedResponse:
     new_items: list[RunItem]
     handoffs: list[ToolRunHandoff]
     functions: list[ToolRunFunction]
     computer_actions: list[ToolRunComputerAction]
+    local_shell_calls: list[ToolRunLocalShellCall]
     tools_used: list[str]  # Names of all tools used, including hosted tools
     mcp_approval_requests: list[ToolRunMCPApprovalRequest]  # Only requests with callbacks
 
@@ -146,6 +163,7 @@ def has_tools_or_approvals_to_run(self) -> bool:
                 self.handoffs,
                 self.functions,
                 self.computer_actions,
+                self.local_shell_calls,
                 self.mcp_approval_requests,
             ]
         )
@@ -371,11 +389,15 @@ def process_model_response(
         run_handoffs = []
         functions = []
         computer_actions = []
+        local_shell_calls = []
         mcp_approval_requests = []
         tools_used: list[str] = []
         handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
         function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
         computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
+        local_shell_tool = next(
+            (tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
+        )
         hosted_mcp_server_map = {
             tool.tool_config["server_label"]: tool
             for tool in all_tools
@@ -434,9 +456,29 @@ def process_model_response(
                         )
             elif isinstance(output, McpListTools):
                 items.append(MCPListToolsItem(raw_item=output, agent=agent))
-            elif isinstance(output, McpCall):
+            elif isinstance(output, ImageGenerationCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append("image_generation")
+            elif isinstance(output, ResponseCodeInterpreterToolCall):
                 items.append(ToolCallItem(raw_item=output, agent=agent))
-                tools_used.append(output.name)
+                tools_used.append("code_interpreter")
+            elif isinstance(output, LocalShellCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append("local_shell")
+                if not local_shell_tool:
+                    _error_tracing.attach_error_to_current_span(
+                        SpanError(
+                            message="Local shell tool not found",
+                            data={},
+                        )
+                    )
+                    raise ModelBehaviorError(
+                        "Model produced local shell call without a local shell tool."
+                    )
+                local_shell_calls.append(
+                    ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
+                )
+
             elif not isinstance(output, ResponseFunctionToolCall):
                 logger.warning(f"Unexpected output type, ignoring: {type(output)}")
                 continue
@@ -478,6 +520,7 @@ def process_model_response(
             handoffs=run_handoffs,
             functions=functions,
             computer_actions=computer_actions,
+            local_shell_calls=local_shell_calls,
             tools_used=tools_used,
             mcp_approval_requests=mcp_approval_requests,
         )
@@ -552,6 +595,30 @@ async def run_single_tool(
             for tool_run, result in zip(tool_runs, results)
         ]
 
+    @classmethod
+    async def execute_local_shell_calls(
+        cls,
+        *,
+        agent: Agent[TContext],
+        calls: list[ToolRunLocalShellCall],
+        context_wrapper: RunContextWrapper[TContext],
+        hooks: RunHooks[TContext],
+        config: RunConfig,
+    ) -> list[RunItem]:
+        results: list[RunItem] = []
+        # Need to run these serially, because each call can affect the local shell state
+        for call in calls:
+            results.append(
+                await LocalShellAction.execute(
+                    agent=agent,
+                    call=call,
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                    config=config,
+                )
+            )
+        return results
+
     @classmethod
     async def execute_computer_actions(
         cls,
@@ -1021,3 +1088,54 @@ async def _get_screenshot_async(
             await computer.wait()
 
         return await computer.screenshot()
+
+
+class LocalShellAction:
+    @classmethod
+    async def execute(
+        cls,
+        *,
+        agent: Agent[TContext],
+        call: ToolRunLocalShellCall,
+        hooks: RunHooks[TContext],
+        context_wrapper: RunContextWrapper[TContext],
+        config: RunConfig,
+    ) -> RunItem:
+        await asyncio.gather(
+            hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool),
+            (
+                agent.hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool)
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+
+        request = LocalShellCommandRequest(
+            ctx_wrapper=context_wrapper,
+            data=call.tool_call,
+        )
+        output = call.local_shell_tool.executor(request)
+        if inspect.isawaitable(output):
+            result = await output
+        else:
+            result = output
+
+        await asyncio.gather(
+            hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result),
+            (
+                agent.hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result)
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+
+        return ToolCallOutputItem(
+            agent=agent,
+            output=output,
+            raw_item={
+                "type": "local_shell_call_output",
+                "id": call.tool_call.call_id,
+                "output": result,
+                # "id": "out" + call.tool_call.id,  # TODO remove this, it should be optional
+            },
+        )
diff --git a/src/agents/items.py b/src/agents/items.py
@@ -18,12 +18,22 @@
     ResponseOutputText,
     ResponseStreamEvent,
 )
+from openai.types.responses.response_code_interpreter_tool_call import (
+    ResponseCodeInterpreterToolCall,
+)
 from openai.types.responses.response_input_item_param import (
     ComputerCallOutput,
     FunctionCallOutput,
+    LocalShellCallOutput,
     McpApprovalResponse,
 )
-from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
+from openai.types.responses.response_output_item import (
+    ImageGenerationCall,
+    LocalShellCall,
+    McpApprovalRequest,
+    McpCall,
+    McpListTools,
+)
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from pydantic import BaseModel
 from typing_extensions import TypeAlias
@@ -114,6 +124,9 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
     ResponseFileSearchToolCall,
     ResponseFunctionWebSearch,
     McpCall,
+    ResponseCodeInterpreterToolCall,
+    ImageGenerationCall,
+    LocalShellCall,
 ]
 """A type that represents a tool call item."""
 
@@ -129,10 +142,12 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):
 
 
 @dataclass
-class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutput]]):
+class ToolCallOutputItem(
+    RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
+):
     """Represents the output of a tool call."""
 
-    raw_item: FunctionCallOutput | ComputerCallOutput
+    raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
     """The raw item from the model."""
 
     output: Any
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
diff --git a/src/agents/tool.py b/src/agents/tool.py