Add ExecutiveAssistant agent and Deepgram voice support

TGreen87 · TGreen87 · commit 7b67ddf2aa2b · 2025-05-20T04:40:04.000+10:00
diff --git a/agents/executive_assistant/__init__.py b/agents/executive_assistant/__init__.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+# mypy: ignore-errors
+
+"""Executive Assistant agent.
+
+This agent orchestrates other agents and provides voice capabilities using
+Deepgram STT and TTS models. It maintains short-term and long-term memory and
+can retrieve information via a simple RAG component.
+"""
+from agents import Agent  # noqa: E402
+
+from .memory import LongTermMemory, ShortTermMemory  # noqa: E402
+from .rag import Retriever  # noqa: E402
+from .tools import get_calendar_events, send_email  # noqa: E402
+
+
+class ExecutiveAssistantState:
+    """Holds resources used by the Executive Assistant."""
+
+    def __init__(self, memory_path: str = "memory.json") -> None:
+        self.short_memory = ShortTermMemory()
+        self.long_memory = LongTermMemory(memory_path)
+        self.retriever = Retriever()
+
+
+executive_assistant_agent = Agent(
+    name="ExecutiveAssistant",
+    instructions=(
+        "You are an executive assistant. Use the available tools to help the user. "
+        "Remember important facts during the conversation for later retrieval."
+    ),
+    model="gpt-4o-mini",
+    tools=[get_calendar_events, send_email],
+)
+
+__all__ = ["ExecutiveAssistantState", "executive_assistant_agent"]
diff --git a/agents/executive_assistant/memory.py b/agents/executive_assistant/memory.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+
+class ShortTermMemory:
+    """In-memory store for conversation turns."""
+
+    def __init__(self) -> None:
+        self._messages: list[dict[str, str]] = []
+
+    def add(self, role: str, content: str) -> None:
+        """Add a message to memory."""
+        self._messages.append({"role": role, "content": content})
+
+    def to_list(self) -> list[dict[str, str]]:
+        """Return the last 20 messages."""
+        return self._messages[-20:]
+
+
+class LongTermMemory:
+    """Simple file backed memory store."""
+
+    def __init__(self, path: str | Path) -> None:
+        self._path = Path(path)
+        if self._path.exists():
+            self._data = json.loads(self._path.read_text())
+        else:
+            self._data = []
+
+    def add(self, item: Any) -> None:
+        """Persist an item to disk."""
+        self._data.append(item)
+        self._path.write_text(json.dumps(self._data))
+
+    def all(self) -> list[Any]:
+        """Return all persisted items."""
+        return list(self._data)
diff --git a/agents/executive_assistant/rag.py b/agents/executive_assistant/rag.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+
+class Retriever:
+    """Very small RAG retriever stub."""
+
+    def __init__(self, corpus: Iterable[str] | None = None) -> None:
+        self._corpus = list(corpus or [])
+
+    def add(self, document: str) -> None:
+        """Add a document to the corpus."""
+        self._corpus.append(document)
+
+    def search(self, query: str) -> list[str]:
+        """Return documents containing the query string."""
+        return [doc for doc in self._corpus if query.lower() in doc.lower()]
diff --git a/agents/executive_assistant/tools.py b/agents/executive_assistant/tools.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from agents import function_tool
+
+
+@function_tool
+def get_calendar_events(date: str) -> str:
+    """Retrieve calendar events for a given date."""
+    # TODO: Integrate with calendar API.
+    return f"No events found for {date}."
+
+
+@function_tool
+def send_email(recipient: str, subject: str, body: str) -> str:
+    """Send a simple email."""
+    # TODO: Integrate with email service.
+    return "Email sent."
diff --git a/agents/pyproject.toml b/agents/pyproject.toml
@@ -0,0 +1,7 @@
+[project]
+name = "custom-agents"
+version = "0.0.0"
+requires-python = ">=3.9"
+
+[tool.hatch.build.targets.wheel]
+packages = ["executive_assistant"]
diff --git a/src/agents/voice/__init__.py b/src/agents/voice/__init__.py
@@ -10,6 +10,9 @@
     TTSVoice,
     VoiceModelProvider,
 )
+from .models.deepgram_model_provider import DeepgramVoiceModelProvider
+from .models.deepgram_stt import DeepgramSTTModel
+from .models.deepgram_tts import DeepgramTTSModel
 from .models.openai_model_provider import OpenAIVoiceModelProvider
 from .models.openai_stt import OpenAISTTModel, OpenAISTTTranscriptionSession
 from .models.openai_tts import OpenAITTSModel
@@ -38,6 +41,9 @@
     "OpenAIVoiceModelProvider",
     "OpenAISTTModel",
     "OpenAITTSModel",
+    "DeepgramVoiceModelProvider",
+    "DeepgramSTTModel",
+    "DeepgramTTSModel",
     "VoiceStreamEventAudio",
     "VoiceStreamEventLifecycle",
     "VoiceStreamEvent",
diff --git a/src/agents/voice/models/deepgram_model_provider.py b/src/agents/voice/models/deepgram_model_provider.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+import httpx  # type: ignore
+
+from ..model import STTModel, TTSModel, VoiceModelProvider
+from .deepgram_stt import DeepgramSTTModel
+from .deepgram_tts import DeepgramTTSModel
+
+DEFAULT_STT_MODEL = "nova-3"
+DEFAULT_TTS_MODEL = "aura-2"
+
+
+class DeepgramVoiceModelProvider(VoiceModelProvider):
+    """Voice model provider for Deepgram APIs."""
+
+    def __init__(self, api_key: str, *, client: httpx.AsyncClient | None = None) -> None:
+        self._api_key = api_key
+        self._client = client
+
+    def _get_client(self) -> httpx.AsyncClient:
+        if self._client is None:
+            self._client = httpx.AsyncClient()
+        return self._client
+
+    def get_stt_model(self, model_name: str | None) -> STTModel:
+        return DeepgramSTTModel(
+            model_name or DEFAULT_STT_MODEL, self._api_key, client=self._get_client()
+        )
+
+    def get_tts_model(self, model_name: str | None) -> TTSModel:
+        return DeepgramTTSModel(
+            model_name or DEFAULT_TTS_MODEL, self._api_key, client=self._get_client()
+        )
diff --git a/src/agents/voice/models/deepgram_stt.py b/src/agents/voice/models/deepgram_stt.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import Any
+
+import httpx  # type: ignore
+
+from ..input import AudioInput, StreamedAudioInput
+from ..model import StreamedTranscriptionSession, STTModel, STTModelSettings
+
+
+class DeepgramSTTModel(STTModel):
+    """Speech-to-text model using Deepgram Nova 3."""
+
+    def __init__(
+        self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None
+    ) -> None:
+        self.model = model
+        self.api_key = api_key
+        self._client = client or httpx.AsyncClient()
+
+    @property
+    def model_name(self) -> str:
+        return self.model
+
+    async def transcribe(
+        self,
+        input: AudioInput,
+        settings: STTModelSettings,
+        trace_include_sensitive_data: bool,
+        trace_include_sensitive_audio_data: bool,
+    ) -> str:
+        url = f"https://api.deepgram.com/v1/listen?model={self.model}"
+        headers = {"Authorization": f"Token {self.api_key}"}
+        filename, data, content_type = input.to_audio_file()
+        response = await self._client.post(url, headers=headers, content=data.getvalue())
+        payload: dict[str, Any] = response.json()
+        return (
+            payload.get("results", {})
+            .get("channels", [{}])[0]
+            .get("alternatives", [{}])[0]
+            .get("transcript", "")
+        )
+
+    async def create_session(
+        self,
+        input: StreamedAudioInput,
+        settings: STTModelSettings,
+        trace_include_sensitive_data: bool,
+        trace_include_sensitive_audio_data: bool,
+    ) -> StreamedTranscriptionSession:
+        raise NotImplementedError("Streaming transcription is not implemented.")
diff --git a/src/agents/voice/models/deepgram_tts.py b/src/agents/voice/models/deepgram_tts.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+
+import httpx  # type: ignore
+
+from ..model import TTSModel, TTSModelSettings
+
+
+class DeepgramTTSModel(TTSModel):
+    """Text-to-speech model using Deepgram Aura 2."""
+
+    def __init__(
+        self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None
+    ) -> None:
+        self.model = model
+        self.api_key = api_key
+        self._client = client or httpx.AsyncClient()
+
+    @property
+    def model_name(self) -> str:
+        return self.model
+
+    async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]:
+        url = "https://api.deepgram.com/v1/speak"
+        headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"}
+        payload = {"text": text, "model": self.model, "voice": settings.voice or "aura-2"}
+        response = await self._client.post(url, headers=headers, json=payload)
+        yield response.content
diff --git a/tests/executive_assistant/test_agent.py b/tests/executive_assistant/test_agent.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import pytest
+
+from agents import Agent, Runner
+from agents.agent import ToolsToFinalOutputResult
+from agents.executive_assistant import executive_assistant_agent
+from tests.fake_model import FakeModel
+
+
+@pytest.mark.asyncio
+async def test_agent_runs_with_fake_model() -> None:
+    model = FakeModel()
+    agent = Agent(
+        name=executive_assistant_agent.name,
+        instructions=executive_assistant_agent.instructions,
+        tools=executive_assistant_agent.tools,
+        model=model,
+    )
+    model.set_next_output(
+        [
+            {"role": "assistant", "content": "Hello"},
+        ]
+    )
+
+    result: ToolsToFinalOutputResult = await Runner.run(agent, "hi")
+    assert result.final_output == "Hello"
diff --git a/tests/voice/test_deepgram_models.py b/tests/voice/test_deepgram_models.py
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+import pytest
+
+from agents.voice import DeepgramVoiceModelProvider
+
+
+@pytest.mark.asyncio
+async def test_provider_returns_models() -> None:
+    provider = DeepgramVoiceModelProvider(api_key="key")
+    stt = provider.get_stt_model(None)
+    tts = provider.get_tts_model(None)
+    assert stt.model_name == "nova-3"
+    assert tts.model_name == "aura-2"