Skip to content

Add Executive Assistant skeleton with Deepgram voice models #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions agents/executive_assistant/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

# mypy: ignore-errors

"""Executive Assistant agent.

This agent orchestrates other agents and provides voice capabilities using
Deepgram STT and TTS models. It maintains short-term and long-term memory and
can retrieve information via a simple RAG component.
"""
from agents import Agent # noqa: E402

from .memory import LongTermMemory, ShortTermMemory # noqa: E402
from .rag import Retriever # noqa: E402
from .tools import get_calendar_events, send_email # noqa: E402


class ExecutiveAssistantState:
"""Holds resources used by the Executive Assistant."""

def __init__(self, memory_path: str = "memory.json") -> None:
self.short_memory = ShortTermMemory()
self.long_memory = LongTermMemory(memory_path)
self.retriever = Retriever()


executive_assistant_agent = Agent(
name="ExecutiveAssistant",
instructions=(
"You are an executive assistant. Use the available tools to help the user. "
"Remember important facts during the conversation for later retrieval."
),
model="gpt-4o-mini",
tools=[get_calendar_events, send_email],
)

__all__ = ["ExecutiveAssistantState", "executive_assistant_agent"]
40 changes: 40 additions & 0 deletions agents/executive_assistant/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

import json
from pathlib import Path
from typing import Any


class ShortTermMemory:
"""In-memory store for conversation turns."""

def __init__(self) -> None:
self._messages: list[dict[str, str]] = []

def add(self, role: str, content: str) -> None:
"""Add a message to memory."""
self._messages.append({"role": role, "content": content})

def to_list(self) -> list[dict[str, str]]:
"""Return the last 20 messages."""
return self._messages[-20:]


class LongTermMemory:
"""Simple file backed memory store."""

def __init__(self, path: str | Path) -> None:
self._path = Path(path)
if self._path.exists():
self._data = json.loads(self._path.read_text())
else:
self._data = []

def add(self, item: Any) -> None:
"""Persist an item to disk."""
self._data.append(item)
self._path.write_text(json.dumps(self._data))

def all(self) -> list[Any]:
"""Return all persisted items."""
return list(self._data)
18 changes: 18 additions & 0 deletions agents/executive_assistant/rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from collections.abc import Iterable


class Retriever:
"""Very small RAG retriever stub."""

def __init__(self, corpus: Iterable[str] | None = None) -> None:
self._corpus = list(corpus or [])

def add(self, document: str) -> None:
"""Add a document to the corpus."""
self._corpus.append(document)

def search(self, query: str) -> list[str]:
"""Return documents containing the query string."""
return [doc for doc in self._corpus if query.lower() in doc.lower()]
17 changes: 17 additions & 0 deletions agents/executive_assistant/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from agents import function_tool


@function_tool
def get_calendar_events(date: str) -> str:
"""Retrieve calendar events for a given date."""
# TODO: Integrate with calendar API.
return f"No events found for {date}."


@function_tool
def send_email(recipient: str, subject: str, body: str) -> str:
"""Send a simple email."""
# TODO: Integrate with email service.
return "Email sent."
7 changes: 7 additions & 0 deletions agents/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[project]
name = "custom-agents"
version = "0.0.0"
requires-python = ">=3.9"

[tool.hatch.build.targets.wheel]
packages = ["executive_assistant"]
6 changes: 6 additions & 0 deletions src/agents/voice/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
TTSVoice,
VoiceModelProvider,
)
from .models.deepgram_model_provider import DeepgramVoiceModelProvider
from .models.deepgram_stt import DeepgramSTTModel
from .models.deepgram_tts import DeepgramTTSModel
from .models.openai_model_provider import OpenAIVoiceModelProvider
from .models.openai_stt import OpenAISTTModel, OpenAISTTTranscriptionSession
from .models.openai_tts import OpenAITTSModel
Expand Down Expand Up @@ -38,6 +41,9 @@
"OpenAIVoiceModelProvider",
"OpenAISTTModel",
"OpenAITTSModel",
"DeepgramVoiceModelProvider",
"DeepgramSTTModel",
"DeepgramTTSModel",
"VoiceStreamEventAudio",
"VoiceStreamEventLifecycle",
"VoiceStreamEvent",
Expand Down
33 changes: 33 additions & 0 deletions src/agents/voice/models/deepgram_model_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

import httpx # type: ignore

from ..model import STTModel, TTSModel, VoiceModelProvider
from .deepgram_stt import DeepgramSTTModel
from .deepgram_tts import DeepgramTTSModel

DEFAULT_STT_MODEL = "nova-3"
DEFAULT_TTS_MODEL = "aura-2"


class DeepgramVoiceModelProvider(VoiceModelProvider):
"""Voice model provider for Deepgram APIs."""

def __init__(self, api_key: str, *, client: httpx.AsyncClient | None = None) -> None:
self._api_key = api_key
self._client = client

def _get_client(self) -> httpx.AsyncClient:
if self._client is None:
self._client = httpx.AsyncClient()
return self._client

def get_stt_model(self, model_name: str | None) -> STTModel:
return DeepgramSTTModel(
model_name or DEFAULT_STT_MODEL, self._api_key, client=self._get_client()
)

def get_tts_model(self, model_name: str | None) -> TTSModel:
return DeepgramTTSModel(
model_name or DEFAULT_TTS_MODEL, self._api_key, client=self._get_client()
)
51 changes: 51 additions & 0 deletions src/agents/voice/models/deepgram_stt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

from typing import Any

import httpx # type: ignore

from ..input import AudioInput, StreamedAudioInput
from ..model import StreamedTranscriptionSession, STTModel, STTModelSettings


class DeepgramSTTModel(STTModel):
"""Speech-to-text model using Deepgram Nova 3."""

def __init__(
self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None
) -> None:
self.model = model
self.api_key = api_key
self._client = client or httpx.AsyncClient()

@property
def model_name(self) -> str:
return self.model

async def transcribe(
self,
input: AudioInput,
settings: STTModelSettings,
trace_include_sensitive_data: bool,
trace_include_sensitive_audio_data: bool,
) -> str:
url = f"https://api.deepgram.com/v1/listen?model={self.model}"
headers = {"Authorization": f"Token {self.api_key}"}
filename, data, content_type = input.to_audio_file()
response = await self._client.post(url, headers=headers, content=data.getvalue())
payload: dict[str, Any] = response.json()
return (
payload.get("results", {})
.get("channels", [{}])[0]
.get("alternatives", [{}])[0]
.get("transcript", "")
)

async def create_session(
self,
input: StreamedAudioInput,
settings: STTModelSettings,
trace_include_sensitive_data: bool,
trace_include_sensitive_audio_data: bool,
) -> StreamedTranscriptionSession:
raise NotImplementedError("Streaming transcription is not implemented.")
29 changes: 29 additions & 0 deletions src/agents/voice/models/deepgram_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from __future__ import annotations

from collections.abc import AsyncIterator

import httpx # type: ignore

from ..model import TTSModel, TTSModelSettings


class DeepgramTTSModel(TTSModel):
"""Text-to-speech model using Deepgram Aura 2."""

def __init__(
self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None
) -> None:
self.model = model
self.api_key = api_key
self._client = client or httpx.AsyncClient()

@property
def model_name(self) -> str:
return self.model

async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]:
url = "https://api.deepgram.com/v1/speak"
headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"}
payload = {"text": text, "model": self.model, "voice": settings.voice or "aura-2"}
response = await self._client.post(url, headers=headers, json=payload)
yield response.content
27 changes: 27 additions & 0 deletions tests/executive_assistant/test_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations

import pytest

from agents import Agent, Runner
from agents.agent import ToolsToFinalOutputResult
from agents.executive_assistant import executive_assistant_agent
from tests.fake_model import FakeModel


@pytest.mark.asyncio
async def test_agent_runs_with_fake_model() -> None:
model = FakeModel()
agent = Agent(
name=executive_assistant_agent.name,
instructions=executive_assistant_agent.instructions,
tools=executive_assistant_agent.tools,
model=model,
)
model.set_next_output(
[
{"role": "assistant", "content": "Hello"},
]
)

result: ToolsToFinalOutputResult = await Runner.run(agent, "hi")
assert result.final_output == "Hello"
14 changes: 14 additions & 0 deletions tests/voice/test_deepgram_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

import pytest

from agents.voice import DeepgramVoiceModelProvider


@pytest.mark.asyncio
async def test_provider_returns_models() -> None:
provider = DeepgramVoiceModelProvider(api_key="key")
stt = provider.get_stt_model(None)
tts = provider.get_tts_model(None)
assert stt.model_name == "nova-3"
assert tts.model_name == "aura-2"