Skip to content

feat: support arbitrary attributes for speak provider #532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion deepgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
1 change: 0 additions & 1 deletion deepgram/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
1 change: 0 additions & 1 deletion deepgram/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
1 change: 0 additions & 1 deletion deepgram/clients/agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
2 changes: 0 additions & 2 deletions deepgram/clients/agent/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
Listen as LatestListen,
ListenProvider as LatestListenProvider,
Speak as LatestSpeak,
SpeakProvider as LatestSpeakProvider,
Header as LatestHeader,
Item as LatestItem,
Properties as LatestProperties,
Expand Down Expand Up @@ -87,7 +86,6 @@
Listen = LatestListen
ListenProvider = LatestListenProvider
Speak = LatestSpeak
SpeakProvider = LatestSpeakProvider
Header = LatestHeader
Item = LatestItem
Properties = LatestProperties
Expand Down
1 change: 0 additions & 1 deletion deepgram/clients/agent/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
1 change: 0 additions & 1 deletion deepgram/clients/agent/v1/websocket/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
Listen,
ListenProvider,
Speak,
SpeakProvider,
Header,
Item,
Properties,
Expand Down
65 changes: 15 additions & 50 deletions deepgram/clients/agent/v1/websocket/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from typing import List, Optional, Union, Any, Tuple
from typing import List, Optional, Union, Any, Tuple, Dict
import logging

from dataclasses import dataclass, field
Expand Down Expand Up @@ -167,52 +167,6 @@ class ThinkProvider(BaseResponse):
)


@dataclass
class SpeakProvider(BaseResponse):
"""
This class defines the provider for the Speak model.
"""

type: Optional[str] = field(default="deepgram")
"""
Deepgram OR OpenAI model to use.
"""
model: Optional[str] = field(
default="aura-2-thalia-en",
metadata=dataclass_config(exclude=lambda f: f is None),
)
"""
ElevenLabs or Cartesia model to use.
"""
model_id: Optional[str] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)
"""
Cartesia voice configuration.
"""
voice: Optional[CartesiaVoice] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)
"""
Cartesia language.
"""
language: Optional[str] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)
"""
ElevenLabs language.
"""
language_code: Optional[str] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)

def __getitem__(self, key):
_dict = self.to_dict()
if "voice" in _dict and isinstance(_dict["voice"], dict):
_dict["voice"] = CartesiaVoice.from_dict(_dict["voice"])
return _dict[key]


Comment on lines -170 to -215
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking of something along the lines of

from dataclasses import dataclass, field, asdict
from typing import Any, Dict

@dataclass
class SpeakProvider:
    """
    This class defines the provider for the Speak model.
    """
    
    """
    The provider type. The only truly mandatory property.
    Marked optional because it has a default value.
    """
    type: Optional[str] = field(default="deepgram")
    
    """
    Internal property to store arbitrary proprties
    """
    __extra: Dict[str, Any] = field(default_factory=dict)

    def __post_init__(self):
        known_fields = {f.name for f in self.__dataclass_fields__.values()}
        for key in list(self.__dict__):
            if key not in known_fields:
                self.__extra[key] = self.__dict__.pop(key)

    def __getitem__(self, key):
        if key == "type":
            return self.type
        return self.__extra[key]

    def __setitem__(self, key, value):
        if key == "type":
            self.type = value
        else:
            self.__extra[key] = value

    def to_dict(self):
        return {"type": self.type, **self.__extra}

@dataclass
class Think(BaseResponse):
"""
Expand Down Expand Up @@ -264,15 +218,26 @@ class Speak(BaseResponse):
This class defines any configuration settings for the Speak model.
"""

provider: SpeakProvider = field(default_factory=SpeakProvider)
provider: dict = field(default_factory=dict)
endpoint: Optional[Endpoint] = field(
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
)

def __post_init__(self):
# Allow attribute-style access to provider dict
class AttrDict(dict):
def __getattr__(self, name):
try:
return self[name]
except KeyError:
raise AttributeError(name)
def __setattr__(self, name, value):
self[name] = value
if not isinstance(self.provider, AttrDict):
self.provider = AttrDict(self.provider)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix linting issues in AttrDict implementation.

The AttrDict implementation is clever and achieves the desired functionality, but there are linting issues that need to be addressed:

     def __post_init__(self):
-        # Allow attribute-style access to provider dict
         class AttrDict(dict):
+            """Dictionary that supports attribute-style access to keys."""
+            
             def __getattr__(self, name):
                 try:
                     return self[name]
                 except KeyError:
-                    raise AttributeError(name)
+                    raise AttributeError(name) from None
             def __setattr__(self, name, value):
                 self[name] = value
         if not isinstance(self.provider, AttrDict):
             self.provider = AttrDict(self.provider)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def __post_init__(self):
# Allow attribute-style access to provider dict
class AttrDict(dict):
def __getattr__(self, name):
try:
return self[name]
except KeyError:
raise AttributeError(name)
def __setattr__(self, name, value):
self[name] = value
if not isinstance(self.provider, AttrDict):
self.provider = AttrDict(self.provider)
def __post_init__(self):
class AttrDict(dict):
"""Dictionary that supports attribute-style access to keys."""
def __getattr__(self, name):
try:
return self[name]
except KeyError:
raise AttributeError(name) from None
def __setattr__(self, name, value):
self[name] = value
if not isinstance(self.provider, AttrDict):
self.provider = AttrDict(self.provider)
🧰 Tools
🪛 GitHub Actions: Check - lint

[warning] 228-228: pylint: Missing class docstring (missing-class-docstring)


[warning] 233-233: pylint: Consider explicitly re-raising using 'except KeyError as exc' and 'raise AttributeError(name) from exc' (raise-missing-from)

🤖 Prompt for AI Agents
In deepgram/clients/agent/v1/websocket/options.py around lines 226 to 237, the
AttrDict class implementation has linting issues likely related to missing
docstrings, improper method formatting, or naming conventions. To fix this, add
appropriate docstrings to the class and its methods, ensure consistent
indentation and spacing, and follow naming conventions for private methods if
applicable. Also, verify that exception raising syntax and method definitions
conform to linting standards.


def __getitem__(self, key):
_dict = self.to_dict()
if "provider" in _dict and isinstance(_dict["provider"], dict):
_dict["provider"] = SpeakProvider.from_dict(_dict["provider"])
if "endpoint" in _dict and isinstance(_dict["endpoint"], dict):
_dict["endpoint"] = Endpoint.from_dict(_dict["endpoint"])
return _dict[key]
Expand Down
100 changes: 100 additions & 0 deletions examples/agent/arbitrary_keys/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2025 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

# Import dependencies and set up the main function
import requests
import wave
import io
import time
import os
import json
import threading
from datetime import datetime

from deepgram import (
DeepgramClient,
DeepgramClientOptions,
AgentWebSocketEvents,
AgentKeepAlive,
)
from deepgram.clients.agent.v1.websocket.options import SettingsOptions

def main():
try:
# Initialize the Voice Agent
api_key = os.getenv("DEEPGRAM_API_KEY")
if not api_key:
raise ValueError("DEEPGRAM_API_KEY environment variable is not set")
print(f"API Key found:")

# Initialize Deepgram client
config = DeepgramClientOptions(
options={
"keepalive": "true",
# "speaker_playback": "true",
},
)
deepgram = DeepgramClient(api_key, config)
connection = deepgram.agent.websocket.v("1")
print("Created WebSocket connection...")

# 4. Configure the Agent
options = SettingsOptions()
# Audio input configuration
options.audio.input.encoding = "linear16"
options.audio.input.sample_rate = 24000
# Audio output configuration
options.audio.output.encoding = "linear16"
options.audio.output.sample_rate = 24000
options.audio.output.container = "wav"
# Agent configuration
options.agent.language = "en"
options.agent.listen.provider.type = "deepgram"
options.agent.listen.provider.model = "nova-3"
options.agent.think.provider.type = "open_ai"
options.agent.think.provider.model = "gpt-4o-mini"
options.agent.think.prompt = "You are a friendly AI assistant."
options.agent.speak.provider.type = "deepgram"
options.agent.speak.provider.model = "aura-2-thalia-en"
options.agent.greeting = "Hello! How can I help you today?"
options.agent.speak.provider.arbitrary_key = "test"

def on_welcome(self, welcome, **kwargs):
print(f"Welcome message received: {welcome}")
with open("chatlog.txt", 'a') as chatlog:
chatlog.write(f"Welcome message: {welcome}\n")

def on_settings_applied(self, settings_applied, **kwargs):
print(f"Settings applied: {settings_applied}")
with open("chatlog.txt", 'a') as chatlog:
chatlog.write(f"Settings applied: {settings_applied}\n")

def on_error(self, error, **kwargs):
print(f"Error received: {error}")
with open("chatlog.txt", 'a') as chatlog:
chatlog.write(f"Error: {error}\n")

# Register handlers
connection.on(AgentWebSocketEvents.Welcome, on_welcome)
connection.on(AgentWebSocketEvents.SettingsApplied, on_settings_applied)
connection.on(AgentWebSocketEvents.Error, on_error)
print("Event handlers registered")

# Start the connection
print("Starting WebSocket connection...")
print(options)
if not connection.start(options):
print("Failed to start connection")
return
print("WebSocket connection started successfully")

# Cleanup
connection.finish()
print("Finished")

except Exception as e:
print(f"Error: {str(e)}")

if __name__ == "__main__":
main()
Loading