diff --git a/deepgram/__init__.py b/deepgram/__init__.py index 95f056d3..42e6d558 100644 --- a/deepgram/__init__.py +++ b/deepgram/__init__.py @@ -340,16 +340,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/client.py b/deepgram/client.py index c9d20fd1..80c5d9c3 100644 --- a/deepgram/client.py +++ b/deepgram/client.py @@ -354,16 +354,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/clients/__init__.py b/deepgram/clients/__init__.py index 0f573d2e..c1bb5c5c 100644 --- a/deepgram/clients/__init__.py +++ b/deepgram/clients/__init__.py @@ -363,16 +363,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/clients/agent/__init__.py b/deepgram/clients/agent/__init__.py index 63c598ef..befd2ef8 100644 --- a/deepgram/clients/agent/__init__.py +++ b/deepgram/clients/agent/__init__.py @@ -38,16 +38,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/clients/agent/client.py b/deepgram/clients/agent/client.py index 6dbfd08f..e7c8eba9 100644 --- a/deepgram/clients/agent/client.py +++ b/deepgram/clients/agent/client.py @@ -37,16 +37,14 @@ AgentKeepAlive as LatestAgentKeepAlive, # sub level Listen as LatestListen, - ListenProvider as LatestListenProvider, Speak as LatestSpeak, - SpeakProvider as LatestSpeakProvider, Header as LatestHeader, Item as LatestItem, Properties as LatestProperties, Parameters as LatestParameters, Function as LatestFunction, Think as LatestThink, - ThinkProvider as LatestThinkProvider, + Provider as LatestProvider, Agent as LatestAgent, Input as LatestInput, Output as LatestOutput, @@ -85,16 +83,14 @@ AgentKeepAlive = LatestAgentKeepAlive Listen = LatestListen -ListenProvider = LatestListenProvider Speak = LatestSpeak -SpeakProvider = LatestSpeakProvider Header = LatestHeader Item = LatestItem Properties = LatestProperties Parameters = LatestParameters Function = LatestFunction Think = LatestThink -ThinkProvider = LatestThinkProvider +Provider = LatestProvider Agent = LatestAgent Input = LatestInput Output = LatestOutput diff --git a/deepgram/clients/agent/v1/__init__.py b/deepgram/clients/agent/v1/__init__.py index 8d48b80b..cd115c9f 100644 --- a/deepgram/clients/agent/v1/__init__.py +++ b/deepgram/clients/agent/v1/__init__.py @@ -42,16 +42,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/clients/agent/v1/websocket/__init__.py b/deepgram/clients/agent/v1/websocket/__init__.py index b1cec3f2..f32c1bb7 100644 --- a/deepgram/clients/agent/v1/websocket/__init__.py +++ b/deepgram/clients/agent/v1/websocket/__init__.py @@ -33,16 +33,14 @@ AgentKeepAlive, # sub level Listen, - ListenProvider, Speak, - SpeakProvider, Header, Item, Properties, Parameters, Function, Think, - ThinkProvider, + Provider, Agent, Input, Output, diff --git a/deepgram/clients/agent/v1/websocket/async_client.py b/deepgram/clients/agent/v1/websocket/async_client.py index 46524468..ae4822a5 100644 --- a/deepgram/clients/agent/v1/websocket/async_client.py +++ b/deepgram/clients/agent/v1/websocket/async_client.py @@ -224,9 +224,6 @@ async def start( else: raise DeepgramError("Invalid options type") - if self._settings.agent.listen.provider.keyterms is not None and self._settings.agent.listen.provider.model is not None and not self._settings.agent.listen.provider.model.startswith("nova-3"): - raise DeepgramError("Keyterms are only supported for nova-3 models") - try: # speaker substitutes the listening thread if self._speaker is not None: diff --git a/deepgram/clients/agent/v1/websocket/client.py b/deepgram/clients/agent/v1/websocket/client.py index fae7830b..8f7e2322 100644 --- a/deepgram/clients/agent/v1/websocket/client.py +++ b/deepgram/clients/agent/v1/websocket/client.py @@ -225,14 +225,6 @@ def start( else: raise DeepgramError("Invalid options type") - if ( - self._settings.agent.listen.provider - and self._settings.agent.listen.provider.keyterms is not None - and self._settings.agent.listen.provider.model is not None - and not self._settings.agent.listen.provider.model.startswith("nova-3") - ): - raise DeepgramError("Keyterms are only supported for nova-3 models") - try: # speaker substitutes the listening thread if self._speaker is not None: diff --git a/deepgram/clients/agent/v1/websocket/options.py b/deepgram/clients/agent/v1/websocket/options.py index d1b1556b..ffdaa5c1 100644 --- a/deepgram/clients/agent/v1/websocket/options.py +++ b/deepgram/clients/agent/v1/websocket/options.py @@ -2,7 +2,7 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT -from typing import List, Optional, Union, Any, Tuple +from typing import List, Optional, Union, Any, Tuple, Dict import logging from dataclasses import dataclass, field @@ -68,6 +68,19 @@ def __getitem__(self, key): _dict["properties"] = _dict["properties"].copy() return _dict[key] +class Provider(dict): + """ + Generic attribute class for provider objects. + """ + def __getattr__(self, name): + try: + return self[name] + except KeyError: + # pylint: disable=raise-missing-from + raise AttributeError(name) + def __setattr__(self, name, value): + self[name] = value + @dataclass class Endpoint(BaseResponse): @@ -121,103 +134,13 @@ def __getitem__(self, key): return _dict[key] -@dataclass -class CartesiaVoice(BaseResponse): - """ - This class defines the voice for the Cartesia model. - """ - - mode: str = field( - default="", metadata=dataclass_config(exclude=lambda f: f is None or f == "") - ) - id: str = field( - default="", metadata=dataclass_config(exclude=lambda f: f is None or f == "") - ) - - -@dataclass -class ListenProvider(BaseResponse): - """ - This class defines the provider for the Listen model. - """ - - type: str = field(default="") - model: str = field(default="") - keyterms: Optional[List[str]] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - - def __getitem__(self, key): - _dict = self.to_dict() - if "keyterms" in _dict and isinstance(_dict["keyterms"], list): - _dict["keyterms"] = [str(keyterm) for keyterm in _dict["keyterms"]] - return _dict[key] - - -@dataclass -class ThinkProvider(BaseResponse): - """ - This class defines the provider for the Think model. - """ - - type: Optional[str] = field(default=None) - model: Optional[str] = field(default=None) - temperature: Optional[float] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - - -@dataclass -class SpeakProvider(BaseResponse): - """ - This class defines the provider for the Speak model. - """ - - type: Optional[str] = field(default="deepgram") - """ - Deepgram OR OpenAI model to use. - """ - model: Optional[str] = field( - default="aura-2-thalia-en", - metadata=dataclass_config(exclude=lambda f: f is None), - ) - """ - ElevenLabs or Cartesia model to use. - """ - model_id: Optional[str] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - """ - Cartesia voice configuration. - """ - voice: Optional[CartesiaVoice] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - """ - Cartesia language. - """ - language: Optional[str] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - """ - ElevenLabs language. - """ - language_code: Optional[str] = field( - default=None, metadata=dataclass_config(exclude=lambda f: f is None) - ) - - def __getitem__(self, key): - _dict = self.to_dict() - if "voice" in _dict and isinstance(_dict["voice"], dict): - _dict["voice"] = CartesiaVoice.from_dict(_dict["voice"]) - return _dict[key] @dataclass class Think(BaseResponse): """ This class defines any configuration settings for the Think model. """ - provider: ThinkProvider = field(default_factory=ThinkProvider) + provider: Provider = field(default_factory=Provider) functions: Optional[List[Function]] = field( default=None, metadata=dataclass_config(exclude=lambda f: f is None) ) @@ -228,10 +151,12 @@ class Think(BaseResponse): default=None, metadata=dataclass_config(exclude=lambda f: f is None) ) + def __post_init__(self): + if not isinstance(self.provider, Provider): + self.provider = Provider(self.provider) + def __getitem__(self, key): _dict = self.to_dict() - if "provider" in _dict and isinstance(_dict["provider"], dict): - _dict["provider"] = ThinkProvider.from_dict(_dict["provider"]) if "functions" in _dict and isinstance(_dict["functions"], list): _dict["functions"] = [ Function.from_dict(function) for function in _dict["functions"] @@ -247,12 +172,14 @@ class Listen(BaseResponse): This class defines any configuration settings for the Listen model. """ - provider: ListenProvider = field(default_factory=ListenProvider) + provider: Provider = field(default_factory=Provider) + + def __post_init__(self): + if not isinstance(self.provider, Provider): + self.provider = Provider(self.provider) def __getitem__(self, key): _dict = self.to_dict() - if "provider" in _dict and isinstance(_dict["provider"], dict): - _dict["provider"] = ListenProvider.from_dict(_dict["provider"]) return _dict[key] @@ -262,15 +189,17 @@ class Speak(BaseResponse): This class defines any configuration settings for the Speak model. """ - provider: SpeakProvider = field(default_factory=SpeakProvider) + provider: Provider = field(default_factory=Provider) endpoint: Optional[Endpoint] = field( default=None, metadata=dataclass_config(exclude=lambda f: f is None) ) + def __post_init__(self): + if not isinstance(self.provider, Provider): + self.provider = Provider(self.provider) + def __getitem__(self, key): _dict = self.to_dict() - if "provider" in _dict and isinstance(_dict["provider"], dict): - _dict["provider"] = SpeakProvider.from_dict(_dict["provider"]) if "endpoint" in _dict and isinstance(_dict["endpoint"], dict): _dict["endpoint"] = Endpoint.from_dict(_dict["endpoint"]) return _dict[key] diff --git a/examples/agent/arbitrary_keys/main.py b/examples/agent/arbitrary_keys/main.py new file mode 100644 index 00000000..14350f43 --- /dev/null +++ b/examples/agent/arbitrary_keys/main.py @@ -0,0 +1,103 @@ +# Copyright 2025 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +# This example should fail, due to the arbitrary key being included in the settings payload. + +# Import dependencies and set up the main function +import requests +import wave +import io +import time +import os +import json +import threading +from datetime import datetime + +from deepgram import ( + DeepgramClient, + DeepgramClientOptions, + AgentWebSocketEvents, + AgentKeepAlive, +) +from deepgram.clients.agent.v1.websocket.options import SettingsOptions + +def main(): + try: + # Initialize the Voice Agent + api_key = os.getenv("DEEPGRAM_API_KEY") + if not api_key: + raise ValueError("DEEPGRAM_API_KEY environment variable is not set") + print(f"API Key found:") + + # Initialize Deepgram client + config = DeepgramClientOptions( + options={ + "keepalive": "true", + # "speaker_playback": "true", + }, + ) + deepgram = DeepgramClient(api_key, config) + connection = deepgram.agent.websocket.v("1") + print("Created WebSocket connection...") + + # 4. Configure the Agent + options = SettingsOptions() + # Audio input configuration + options.audio.input.encoding = "linear16" + options.audio.input.sample_rate = 24000 + # Audio output configuration + options.audio.output.encoding = "linear16" + options.audio.output.sample_rate = 24000 + options.audio.output.container = "wav" + # Agent configuration + options.agent.language = "en" + options.agent.listen.provider.type = "deepgram" + options.agent.listen.provider.model = "nova-3" + options.agent.think.provider.type = "open_ai" + options.agent.think.provider.model = "gpt-4o-mini" + options.agent.think.prompt = "You are a friendly AI assistant." + options.agent.speak.provider.type = "deepgram" + options.agent.speak.provider.model = "aura-2-thalia-en" + options.agent.greeting = "Hello! How can I help you today?" + options.agent.speak.provider.arbitrary_key = "test" + + def on_welcome(self, welcome, **kwargs): + print(f"Welcome message received: {welcome}") + with open("chatlog.txt", 'a') as chatlog: + chatlog.write(f"Welcome message: {welcome}\n") + + def on_settings_applied(self, settings_applied, **kwargs): + print(f"Settings applied: {settings_applied}") + with open("chatlog.txt", 'a') as chatlog: + chatlog.write(f"Settings applied: {settings_applied}\n") + + def on_error(self, error, **kwargs): + print(f"Error received: {error}") + with open("chatlog.txt", 'a') as chatlog: + chatlog.write(f"Error: {error}\n") + + # Register handlers + connection.on(AgentWebSocketEvents.Welcome, on_welcome) + connection.on(AgentWebSocketEvents.SettingsApplied, on_settings_applied) + connection.on(AgentWebSocketEvents.Error, on_error) + print("Event handlers registered") + + # Start the connection + print("Starting WebSocket connection...") + print(options) + if not connection.start(options): + print("Failed to start connection") + return + print("WebSocket connection started successfully") + + # Cleanup + connection.finish() + print("Finished! You should see an error for the arbitrary key - scroll up and you can see it is included in the settings payload.") + print("If you do not see that error, this example has failed.") + + except Exception as e: + print(f"Error: {str(e)}") + +if __name__ == "__main__": + main()