Skip to content

Add support for local shell, image generator, code interpreter tools #732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions examples/tools/code_interpreter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import asyncio

from agents import Agent, CodeInterpreterTool, Runner, trace


async def main():
agent = Agent(
name="Code interpreter",
instructions="You love doing math.",
tools=[
CodeInterpreterTool(
tool_config={"type": "code_interpreter", "container": {"type": "auto"}},
)
],
)

with trace("Code interpreter example"):
print("Solving math problem...")
result = Runner.run_streamed(agent, "What is the square root of273 * 312821 plus 1782?")
async for event in result.stream_events():
if (
event.type == "run_item_stream_event"
and event.item.type == "tool_call_item"
and event.item.raw_item.type == "code_interpreter_call"
):
print(f"Code interpreter code:\n```\n{event.item.raw_item.code}\n```\n")
elif event.type == "run_item_stream_event":
print(f"Other event: {event.item.type}")

print(f"Final output: {result.final_output}")


if __name__ == "__main__":
asyncio.run(main())
54 changes: 54 additions & 0 deletions examples/tools/image_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import asyncio
import base64
import os
import subprocess
import sys
import tempfile

from agents import Agent, ImageGenerationTool, Runner, trace


def open_file(path: str) -> None:
if sys.platform.startswith("darwin"):
subprocess.run(["open", path], check=False) # macOS
elif os.name == "nt": # Windows
os.astartfile(path) # type: ignore
elif os.name == "posix":
subprocess.run(["xdg-open", path], check=False) # Linux/Unix
else:
print(f"Don't know how to open files on this platform: {sys.platform}")


async def main():
agent = Agent(
name="Image generator",
instructions="You are a helpful agent.",
tools=[
ImageGenerationTool(
tool_config={"type": "image_generation", "quality": "low"},
)
],
)

with trace("Image generation example"):
print("Generating image, this may take a while...")
result = await Runner.run(
agent, "Create an image of a frog eating a pizza, comic book style."
)
print(result.final_output)
for item in result.new_items:
if (
item.type == "tool_call_item"
and item.raw_item.type == "image_generation_call"
and (img_result := item.raw_item.result)
):
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
tmp.write(base64.b64decode(img_result))
temp_path = tmp.name

# Open the image
open_file(temp_path)


if __name__ == "__main__":
asyncio.run(main())
10 changes: 10 additions & 0 deletions src/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,16 @@
StreamEvent,
)
from .tool import (
CodeInterpreterTool,
ComputerTool,
FileSearchTool,
FunctionTool,
FunctionToolResult,
HostedMCPTool,
ImageGenerationTool,
LocalShellCommandRequest,
LocalShellExecutor,
LocalShellTool,
MCPToolApprovalFunction,
MCPToolApprovalFunctionResult,
MCPToolApprovalRequest,
Expand Down Expand Up @@ -210,6 +215,11 @@ def enable_verbose_stdout_logging():
"FunctionToolResult",
"ComputerTool",
"FileSearchTool",
"CodeInterpreterTool",
"ImageGenerationTool",
"LocalShellCommandRequest",
"LocalShellExecutor",
"LocalShellTool",
"Tool",
"WebSearchTool",
"HostedMCPTool",
Expand Down
124 changes: 121 additions & 3 deletions src/agents/_run_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
ResponseFunctionWebSearch,
ResponseOutputMessage,
)
from openai.types.responses.response_code_interpreter_tool_call import (
ResponseCodeInterpreterToolCall,
)
from openai.types.responses.response_computer_tool_call import (
ActionClick,
ActionDoubleClick,
Expand All @@ -26,7 +29,12 @@
ActionWait,
)
from openai.types.responses.response_input_param import ComputerCallOutput, McpApprovalResponse
from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
from openai.types.responses.response_output_item import (
ImageGenerationCall,
LocalShellCall,
McpApprovalRequest,
McpListTools,
)
from openai.types.responses.response_reasoning_item import ResponseReasoningItem

from .agent import Agent, ToolsToFinalOutputResult
Expand Down Expand Up @@ -61,6 +69,8 @@
FunctionTool,
FunctionToolResult,
HostedMCPTool,
LocalShellCommandRequest,
LocalShellTool,
MCPToolApprovalRequest,
Tool,
)
Expand Down Expand Up @@ -129,12 +139,19 @@ class ToolRunMCPApprovalRequest:
mcp_tool: HostedMCPTool


@dataclass
class ToolRunLocalShellCall:
tool_call: LocalShellCall
local_shell_tool: LocalShellTool


@dataclass
class ProcessedResponse:
new_items: list[RunItem]
handoffs: list[ToolRunHandoff]
functions: list[ToolRunFunction]
computer_actions: list[ToolRunComputerAction]
local_shell_calls: list[ToolRunLocalShellCall]
tools_used: list[str] # Names of all tools used, including hosted tools
mcp_approval_requests: list[ToolRunMCPApprovalRequest] # Only requests with callbacks

Expand All @@ -146,6 +163,7 @@ def has_tools_or_approvals_to_run(self) -> bool:
self.handoffs,
self.functions,
self.computer_actions,
self.local_shell_calls,
self.mcp_approval_requests,
]
)
Expand Down Expand Up @@ -371,11 +389,15 @@ def process_model_response(
run_handoffs = []
functions = []
computer_actions = []
local_shell_calls = []
mcp_approval_requests = []
tools_used: list[str] = []
handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
local_shell_tool = next(
(tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
)
hosted_mcp_server_map = {
tool.tool_config["server_label"]: tool
for tool in all_tools
Expand Down Expand Up @@ -434,9 +456,29 @@ def process_model_response(
)
elif isinstance(output, McpListTools):
items.append(MCPListToolsItem(raw_item=output, agent=agent))
elif isinstance(output, McpCall):
elif isinstance(output, ImageGenerationCall):
items.append(ToolCallItem(raw_item=output, agent=agent))
tools_used.append("image_generation")
elif isinstance(output, ResponseCodeInterpreterToolCall):
items.append(ToolCallItem(raw_item=output, agent=agent))
tools_used.append(output.name)
tools_used.append("code_interpreter")
elif isinstance(output, LocalShellCall):
items.append(ToolCallItem(raw_item=output, agent=agent))
tools_used.append("local_shell")
if not local_shell_tool:
_error_tracing.attach_error_to_current_span(
SpanError(
message="Local shell tool not found",
data={},
)
)
raise ModelBehaviorError(
"Model produced local shell call without a local shell tool."
)
local_shell_calls.append(
ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
)

elif not isinstance(output, ResponseFunctionToolCall):
logger.warning(f"Unexpected output type, ignoring: {type(output)}")
continue
Expand Down Expand Up @@ -478,6 +520,7 @@ def process_model_response(
handoffs=run_handoffs,
functions=functions,
computer_actions=computer_actions,
local_shell_calls=local_shell_calls,
tools_used=tools_used,
mcp_approval_requests=mcp_approval_requests,
)
Expand Down Expand Up @@ -552,6 +595,30 @@ async def run_single_tool(
for tool_run, result in zip(tool_runs, results)
]

@classmethod
async def execute_local_shell_calls(
cls,
*,
agent: Agent[TContext],
calls: list[ToolRunLocalShellCall],
context_wrapper: RunContextWrapper[TContext],
hooks: RunHooks[TContext],
config: RunConfig,
) -> list[RunItem]:
results: list[RunItem] = []
# Need to run these serially, because each call can affect the local shell state
for call in calls:
results.append(
await LocalShellAction.execute(
agent=agent,
call=call,
hooks=hooks,
context_wrapper=context_wrapper,
config=config,
)
)
return results

@classmethod
async def execute_computer_actions(
cls,
Expand Down Expand Up @@ -1021,3 +1088,54 @@ async def _get_screenshot_async(
await computer.wait()

return await computer.screenshot()


class LocalShellAction:
@classmethod
async def execute(
cls,
*,
agent: Agent[TContext],
call: ToolRunLocalShellCall,
hooks: RunHooks[TContext],
context_wrapper: RunContextWrapper[TContext],
config: RunConfig,
) -> RunItem:
await asyncio.gather(
hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool),
(
agent.hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool)
if agent.hooks
else _coro.noop_coroutine()
),
)

request = LocalShellCommandRequest(
ctx_wrapper=context_wrapper,
data=call.tool_call,
)
output = call.local_shell_tool.executor(request)
if inspect.isawaitable(output):
result = await output
else:
result = output

await asyncio.gather(
hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result),
(
agent.hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result)
if agent.hooks
else _coro.noop_coroutine()
),
)

return ToolCallOutputItem(
agent=agent,
output=output,
raw_item={
"type": "local_shell_call_output",
"id": call.tool_call.call_id,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this parameter call_id

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that param doesn't exist any more, idk why. I need to double check.

"output": result,
# "id": "out" + call.tool_call.id, # TODO remove this, it should be optional
},
)
21 changes: 18 additions & 3 deletions src/agents/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,22 @@
ResponseOutputText,
ResponseStreamEvent,
)
from openai.types.responses.response_code_interpreter_tool_call import (
ResponseCodeInterpreterToolCall,
)
from openai.types.responses.response_input_item_param import (
ComputerCallOutput,
FunctionCallOutput,
LocalShellCallOutput,
McpApprovalResponse,
)
from openai.types.responses.response_output_item import McpApprovalRequest, McpCall, McpListTools
from openai.types.responses.response_output_item import (
ImageGenerationCall,
LocalShellCall,
McpApprovalRequest,
McpCall,
McpListTools,
)
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
from pydantic import BaseModel
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -114,6 +124,9 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
ResponseFileSearchToolCall,
ResponseFunctionWebSearch,
McpCall,
ResponseCodeInterpreterToolCall,
ImageGenerationCall,
LocalShellCall,
]
"""A type that represents a tool call item."""

Expand All @@ -129,10 +142,12 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):


@dataclass
class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutput]]):
class ToolCallOutputItem(
RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
):
"""Represents the output of a tool call."""

raw_item: FunctionCallOutput | ComputerCallOutput
raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
"""The raw item from the model."""

output: Any
Expand Down
Loading