Skip to content

Commit 466b44d

Browse files
authored
Dev/add usage details to Usage class (#726)
PR to enhance the `Usage` object and related logic, to support more granular token accounting, matching the details available in the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) . Specifically, it: - Adds `input_tokens_details` and `output_tokens_details` fields to the `Usage` dataclass, storing detailed token breakdowns (e.g., `cached_tokens`, `reasoning_tokens`). - Flows this change through - Updates and extends tests to match - Adds a test for the Usage.add method ### Motivation - Aligns the SDK’s usage with the latest OpenAI responses API Usage object - Supports downstream use cases that require fine-grained token usage data (e.g., billing, analytics, optimization) requested by startups --------- Co-authored-by: Wulfie Bain <wulfie@openai.com>
1 parent 428c9a6 commit 466b44d

11 files changed

+178
-15
lines changed

src/agents/extensions/models/litellm_model.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Any, Literal, cast, overload
77

88
import litellm.types
9+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
910

1011
from agents.exceptions import ModelBehaviorError
1112

@@ -107,6 +108,16 @@ async def get_response(
107108
input_tokens=response_usage.prompt_tokens,
108109
output_tokens=response_usage.completion_tokens,
109110
total_tokens=response_usage.total_tokens,
111+
input_tokens_details=InputTokensDetails(
112+
cached_tokens=getattr(
113+
response_usage.prompt_tokens_details, "cached_tokens", 0
114+
)
115+
),
116+
output_tokens_details=OutputTokensDetails(
117+
reasoning_tokens=getattr(
118+
response_usage.completion_tokens_details, "reasoning_tokens", 0
119+
)
120+
),
110121
)
111122
if response.usage
112123
else Usage()

src/agents/models/openai_chatcompletions.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from openai.types import ChatModel
1010
from openai.types.chat import ChatCompletion, ChatCompletionChunk
1111
from openai.types.responses import Response
12+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
1213

1314
from .. import _debug
1415
from ..agent_output import AgentOutputSchemaBase
@@ -83,6 +84,18 @@ async def get_response(
8384
input_tokens=response.usage.prompt_tokens,
8485
output_tokens=response.usage.completion_tokens,
8586
total_tokens=response.usage.total_tokens,
87+
input_tokens_details=InputTokensDetails(
88+
cached_tokens=getattr(
89+
response.usage.prompt_tokens_details, "cached_tokens", 0
90+
)
91+
or 0,
92+
),
93+
output_tokens_details=OutputTokensDetails(
94+
reasoning_tokens=getattr(
95+
response.usage.completion_tokens_details, "reasoning_tokens", 0
96+
)
97+
or 0,
98+
),
8699
)
87100
if response.usage
88101
else Usage()
@@ -252,7 +265,7 @@ async def _fetch_response(
252265
stream_options=self._non_null_or_not_given(stream_options),
253266
store=self._non_null_or_not_given(store),
254267
reasoning_effort=self._non_null_or_not_given(reasoning_effort),
255-
extra_headers={ **HEADERS, **(model_settings.extra_headers or {}) },
268+
extra_headers={**HEADERS, **(model_settings.extra_headers or {})},
256269
extra_query=model_settings.extra_query,
257270
extra_body=model_settings.extra_body,
258271
metadata=self._non_null_or_not_given(model_settings.metadata),

src/agents/models/openai_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ async def get_response(
9898
input_tokens=response.usage.input_tokens,
9999
output_tokens=response.usage.output_tokens,
100100
total_tokens=response.usage.total_tokens,
101+
input_tokens_details=response.usage.input_tokens_details,
102+
output_tokens_details=response.usage.output_tokens_details,
101103
)
102104
if response.usage
103105
else Usage()

src/agents/run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,8 @@ async def _run_single_turn_streamed(
689689
input_tokens=event.response.usage.input_tokens,
690690
output_tokens=event.response.usage.output_tokens,
691691
total_tokens=event.response.usage.total_tokens,
692+
input_tokens_details=event.response.usage.input_tokens_details,
693+
output_tokens_details=event.response.usage.output_tokens_details,
692694
)
693695
if event.response.usage
694696
else Usage()

src/agents/usage.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from dataclasses import dataclass
1+
from dataclasses import dataclass, field
2+
3+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
24

35

46
@dataclass
@@ -9,9 +11,18 @@ class Usage:
911
input_tokens: int = 0
1012
"""Total input tokens sent, across all requests."""
1113

14+
input_tokens_details: InputTokensDetails = field(
15+
default_factory=lambda: InputTokensDetails(cached_tokens=0)
16+
)
17+
"""Details about the input tokens, matching responses API usage details."""
1218
output_tokens: int = 0
1319
"""Total output tokens received, across all requests."""
1420

21+
output_tokens_details: OutputTokensDetails = field(
22+
default_factory=lambda: OutputTokensDetails(reasoning_tokens=0)
23+
)
24+
"""Details about the output tokens, matching responses API usage details."""
25+
1526
total_tokens: int = 0
1627
"""Total tokens sent and received, across all requests."""
1728

@@ -20,3 +31,12 @@ def add(self, other: "Usage") -> None:
2031
self.input_tokens += other.input_tokens if other.input_tokens else 0
2132
self.output_tokens += other.output_tokens if other.output_tokens else 0
2233
self.total_tokens += other.total_tokens if other.total_tokens else 0
34+
self.input_tokens_details = InputTokensDetails(
35+
cached_tokens=self.input_tokens_details.cached_tokens
36+
+ other.input_tokens_details.cached_tokens
37+
)
38+
39+
self.output_tokens_details = OutputTokensDetails(
40+
reasoning_tokens=self.output_tokens_details.reasoning_tokens
41+
+ other.output_tokens_details.reasoning_tokens
42+
)

tests/models/test_litellm_chatcompletions_stream.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
ChoiceDeltaToolCall,
99
ChoiceDeltaToolCallFunction,
1010
)
11-
from openai.types.completion_usage import CompletionUsage
11+
from openai.types.completion_usage import (
12+
CompletionTokensDetails,
13+
CompletionUsage,
14+
PromptTokensDetails,
15+
)
1216
from openai.types.responses import (
1317
Response,
1418
ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
4650
model="fake",
4751
object="chat.completion.chunk",
4852
choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
49-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
53+
usage=CompletionUsage(
54+
completion_tokens=5,
55+
prompt_tokens=7,
56+
total_tokens=12,
57+
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
58+
prompt_tokens_details=PromptTokensDetails(cached_tokens=6),
59+
),
5060
)
5161

5262
async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
112122
assert completed_resp.usage.input_tokens == 7
113123
assert completed_resp.usage.output_tokens == 5
114124
assert completed_resp.usage.total_tokens == 12
125+
assert completed_resp.usage.input_tokens_details.cached_tokens == 6
126+
assert completed_resp.usage.output_tokens_details.reasoning_tokens == 2
115127

116128

117129
@pytest.mark.allow_call_model_methods

tests/test_extra_headers.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
from openai.types.chat.chat_completion import ChatCompletion, Choice
33
from openai.types.chat.chat_completion_message import ChatCompletionMessage
4+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
45

56
from agents import ModelSettings, ModelTracing, OpenAIChatCompletionsModel, OpenAIResponsesModel
67

@@ -17,21 +18,29 @@ class DummyResponses:
1718
async def create(self, **kwargs):
1819
nonlocal called_kwargs
1920
called_kwargs = kwargs
21+
2022
class DummyResponse:
2123
id = "dummy"
2224
output = []
2325
usage = type(
24-
"Usage", (), {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
26+
"Usage",
27+
(),
28+
{
29+
"input_tokens": 0,
30+
"output_tokens": 0,
31+
"total_tokens": 0,
32+
"input_tokens_details": InputTokensDetails(cached_tokens=0),
33+
"output_tokens_details": OutputTokensDetails(reasoning_tokens=0),
34+
},
2535
)()
36+
2637
return DummyResponse()
2738

2839
class DummyClient:
2940
def __init__(self):
3041
self.responses = DummyResponses()
3142

32-
33-
34-
model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
43+
model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
3544
extra_headers = {"X-Test-Header": "test-value"}
3645
await model.get_response(
3746
system_instructions=None,
@@ -47,7 +56,6 @@ def __init__(self):
4756
assert called_kwargs["extra_headers"]["X-Test-Header"] == "test-value"
4857

4958

50-
5159
@pytest.mark.allow_call_model_methods
5260
@pytest.mark.asyncio
5361
async def test_extra_headers_passed_to_openai_client():
@@ -76,7 +84,7 @@ def __init__(self):
7684
self.chat = type("_Chat", (), {"completions": DummyCompletions()})()
7785
self.base_url = "https://api.openai.com"
7886

79-
model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
87+
model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
8088
extra_headers = {"X-Test-Header": "test-value"}
8189
await model.get_response(
8290
system_instructions=None,

tests/test_openai_chatcompletions.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
ChatCompletionMessageToolCall,
1414
Function,
1515
)
16-
from openai.types.completion_usage import CompletionUsage
16+
from openai.types.completion_usage import (
17+
CompletionUsage,
18+
PromptTokensDetails,
19+
)
1720
from openai.types.responses import (
1821
Response,
1922
ResponseFunctionToolCall,
@@ -51,7 +54,13 @@ async def test_get_response_with_text_message(monkeypatch) -> None:
5154
model="fake",
5255
object="chat.completion",
5356
choices=[choice],
54-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
57+
usage=CompletionUsage(
58+
completion_tokens=5,
59+
prompt_tokens=7,
60+
total_tokens=12,
61+
# completion_tokens_details left blank to test default
62+
prompt_tokens_details=PromptTokensDetails(cached_tokens=3),
63+
),
5564
)
5665

5766
async def patched_fetch_response(self, *args, **kwargs):
@@ -81,6 +90,8 @@ async def patched_fetch_response(self, *args, **kwargs):
8190
assert resp.usage.input_tokens == 7
8291
assert resp.usage.output_tokens == 5
8392
assert resp.usage.total_tokens == 12
93+
assert resp.usage.input_tokens_details.cached_tokens == 3
94+
assert resp.usage.output_tokens_details.reasoning_tokens == 0
8495
assert resp.response_id is None
8596

8697

@@ -127,6 +138,8 @@ async def patched_fetch_response(self, *args, **kwargs):
127138
assert resp.usage.requests == 0
128139
assert resp.usage.input_tokens == 0
129140
assert resp.usage.output_tokens == 0
141+
assert resp.usage.input_tokens_details.cached_tokens == 0
142+
assert resp.usage.output_tokens_details.reasoning_tokens == 0
130143

131144

132145
@pytest.mark.allow_call_model_methods

tests/test_openai_chatcompletions_stream.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
ChoiceDeltaToolCall,
99
ChoiceDeltaToolCallFunction,
1010
)
11-
from openai.types.completion_usage import CompletionUsage
11+
from openai.types.completion_usage import (
12+
CompletionTokensDetails,
13+
CompletionUsage,
14+
PromptTokensDetails,
15+
)
1216
from openai.types.responses import (
1317
Response,
1418
ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
4650
model="fake",
4751
object="chat.completion.chunk",
4852
choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
49-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
53+
usage=CompletionUsage(
54+
completion_tokens=5,
55+
prompt_tokens=7,
56+
total_tokens=12,
57+
prompt_tokens_details=PromptTokensDetails(cached_tokens=2),
58+
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=3),
59+
),
5060
)
5161

5262
async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
112122
assert completed_resp.usage.input_tokens == 7
113123
assert completed_resp.usage.output_tokens == 5
114124
assert completed_resp.usage.total_tokens == 12
125+
assert completed_resp.usage.input_tokens_details.cached_tokens == 2
126+
assert completed_resp.usage.output_tokens_details.reasoning_tokens == 3
115127

116128

117129
@pytest.mark.allow_call_model_methods

tests/test_responses_tracing.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
from typing import Optional
2+
13
import pytest
24
from inline_snapshot import snapshot
35
from openai import AsyncOpenAI
46
from openai.types.responses import ResponseCompletedEvent
7+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
58

69
from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace
710
from agents.tracing.span_data import ResponseSpanData
@@ -16,10 +19,25 @@ def is_disabled(self):
1619

1720

1821
class DummyUsage:
19-
def __init__(self, input_tokens=1, output_tokens=1, total_tokens=2):
22+
def __init__(
23+
self,
24+
input_tokens: int = 1,
25+
input_tokens_details: Optional[InputTokensDetails] = None,
26+
output_tokens: int = 1,
27+
output_tokens_details: Optional[OutputTokensDetails] = None,
28+
total_tokens: int = 2,
29+
):
2030
self.input_tokens = input_tokens
2131
self.output_tokens = output_tokens
2232
self.total_tokens = total_tokens
33+
self.input_tokens_details = (
34+
input_tokens_details if input_tokens_details else InputTokensDetails(cached_tokens=0)
35+
)
36+
self.output_tokens_details = (
37+
output_tokens_details
38+
if output_tokens_details
39+
else OutputTokensDetails(reasoning_tokens=0)
40+
)
2341

2442

2543
class DummyResponse:

tests/test_usage.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
2+
3+
from agents.usage import Usage
4+
5+
6+
def test_usage_add_aggregates_all_fields():
7+
u1 = Usage(
8+
requests=1,
9+
input_tokens=10,
10+
input_tokens_details=InputTokensDetails(cached_tokens=3),
11+
output_tokens=20,
12+
output_tokens_details=OutputTokensDetails(reasoning_tokens=5),
13+
total_tokens=30,
14+
)
15+
u2 = Usage(
16+
requests=2,
17+
input_tokens=7,
18+
input_tokens_details=InputTokensDetails(cached_tokens=4),
19+
output_tokens=8,
20+
output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
21+
total_tokens=15,
22+
)
23+
24+
u1.add(u2)
25+
26+
assert u1.requests == 3
27+
assert u1.input_tokens == 17
28+
assert u1.output_tokens == 28
29+
assert u1.total_tokens == 45
30+
assert u1.input_tokens_details.cached_tokens == 7
31+
assert u1.output_tokens_details.reasoning_tokens == 11
32+
33+
34+
def test_usage_add_aggregates_with_none_values():
35+
u1 = Usage()
36+
u2 = Usage(
37+
requests=2,
38+
input_tokens=7,
39+
input_tokens_details=InputTokensDetails(cached_tokens=4),
40+
output_tokens=8,
41+
output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
42+
total_tokens=15,
43+
)
44+
45+
u1.add(u2)
46+
47+
assert u1.requests == 2
48+
assert u1.input_tokens == 7
49+
assert u1.output_tokens == 8
50+
assert u1.total_tokens == 15
51+
assert u1.input_tokens_details.cached_tokens == 4
52+
assert u1.output_tokens_details.reasoning_tokens == 6

0 commit comments

Comments
 (0)