Skip to content

Commit 1a8db5e

Browse files
Discard open spans after 10 minutes (#2801)
OTel spans that are handled in the Sentry span processor can never be finished/closed. This leads to a memory leak. This change makes sure that open spans will be removed from memory after 10 minutes to prevent memory usage from growing constantly. Fixes #2722 --------- Co-authored-by: Daniel Szoke <szokeasaurusrex@users.noreply.github.com>
1 parent f40e27f commit 1a8db5e

File tree

2 files changed

+139
-3
lines changed

2 files changed

+139
-3
lines changed

sentry_sdk/integrations/opentelemetry/span_processor.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from time import time
2+
13
from opentelemetry.context import get_value # type: ignore
24
from opentelemetry.sdk.trace import SpanProcessor # type: ignore
35
from opentelemetry.semconv.trace import SpanAttributes # type: ignore
@@ -33,6 +35,7 @@
3335
from sentry_sdk._types import Event, Hint
3436

3537
OPEN_TELEMETRY_CONTEXT = "otel"
38+
SPAN_MAX_TIME_OPEN_MINUTES = 10
3639

3740

3841
def link_trace_context_to_error_event(event, otel_span_map):
@@ -76,6 +79,9 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore
7679
# The mapping from otel span ids to sentry spans
7780
otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]]
7881

82+
# The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES
83+
open_spans = {} # type: dict[int, set[str]]
84+
7985
def __new__(cls):
8086
# type: () -> SentrySpanProcessor
8187
if not hasattr(cls, "instance"):
@@ -90,6 +96,24 @@ def global_event_processor(event, hint):
9096
# type: (Event, Hint) -> Event
9197
return link_trace_context_to_error_event(event, self.otel_span_map)
9298

99+
def _prune_old_spans(self):
100+
# type: (SentrySpanProcessor) -> None
101+
"""
102+
Prune spans that have been open for too long.
103+
"""
104+
current_time_minutes = int(time() / 60)
105+
for span_start_minutes in list(
106+
self.open_spans.keys()
107+
): # making a list because we change the dict
108+
# prune empty open spans buckets
109+
if self.open_spans[span_start_minutes] == set():
110+
self.open_spans.pop(span_start_minutes)
111+
112+
# prune old buckets
113+
elif current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES:
114+
for span_id in self.open_spans.pop(span_start_minutes):
115+
self.otel_span_map.pop(span_id, None)
116+
93117
def on_start(self, otel_span, parent_context=None):
94118
# type: (OTelSpan, Optional[SpanContext]) -> None
95119
hub = Hub.current
@@ -125,7 +149,9 @@ def on_start(self, otel_span, parent_context=None):
125149
sentry_span = sentry_parent_span.start_child(
126150
span_id=trace_data["span_id"],
127151
description=otel_span.name,
128-
start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9),
152+
start_timestamp=utc_from_timestamp(
153+
otel_span.start_time / 1e9
154+
), # OTel spans have nanosecond precision
129155
instrumenter=INSTRUMENTER.OTEL,
130156
)
131157
else:
@@ -135,12 +161,22 @@ def on_start(self, otel_span, parent_context=None):
135161
parent_span_id=parent_span_id,
136162
trace_id=trace_data["trace_id"],
137163
baggage=trace_data["baggage"],
138-
start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9),
164+
start_timestamp=utc_from_timestamp(
165+
otel_span.start_time / 1e9
166+
), # OTel spans have nanosecond precision
139167
instrumenter=INSTRUMENTER.OTEL,
140168
)
141169

142170
self.otel_span_map[trace_data["span_id"]] = sentry_span
143171

172+
span_start_in_minutes = int(
173+
otel_span.start_time / 1e9 / 60
174+
) # OTel spans have nanosecond precision
175+
self.open_spans.setdefault(span_start_in_minutes, set()).add(
176+
trace_data["span_id"]
177+
)
178+
self._prune_old_spans()
179+
144180
def on_end(self, otel_span):
145181
# type: (OTelSpan) -> None
146182
hub = Hub.current
@@ -173,7 +209,15 @@ def on_end(self, otel_span):
173209
else:
174210
self._update_span_with_otel_data(sentry_span, otel_span)
175211

176-
sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9))
212+
sentry_span.finish(
213+
end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)
214+
) # OTel spans have nanosecond precision
215+
216+
span_start_in_minutes = int(
217+
otel_span.start_time / 1e9 / 60
218+
) # OTel spans have nanosecond precision
219+
self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id)
220+
self._prune_old_spans()
177221

178222
def _is_sentry_span(self, hub, otel_span):
179223
# type: (Hub, OTelSpan) -> bool

tests/integrations/opentelemetry/test_span_processor.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,95 @@ def test_link_trace_context_to_error_event():
531531
assert "contexts" in event
532532
assert "trace" in event["contexts"]
533533
assert event["contexts"]["trace"] == fake_trace_context
534+
535+
536+
def test_pruning_old_spans_on_start():
537+
otel_span = MagicMock()
538+
otel_span.name = "Sample OTel Span"
539+
otel_span.start_time = time.time_ns()
540+
span_context = SpanContext(
541+
trace_id=int("1234567890abcdef1234567890abcdef", 16),
542+
span_id=int("1234567890abcdef", 16),
543+
is_remote=True,
544+
)
545+
otel_span.get_span_context.return_value = span_context
546+
otel_span.parent = MagicMock()
547+
otel_span.parent.span_id = int("abcdef1234567890", 16)
548+
549+
parent_context = {}
550+
fake_client = MagicMock()
551+
fake_client.options = {"instrumenter": "otel"}
552+
fake_client.dsn = "https://1234567890abcdef@o123456.ingest.sentry.io/123456"
553+
554+
current_hub = MagicMock()
555+
current_hub.client = fake_client
556+
557+
fake_hub = MagicMock()
558+
fake_hub.current = current_hub
559+
560+
with mock.patch(
561+
"sentry_sdk.integrations.opentelemetry.span_processor.Hub", fake_hub
562+
):
563+
span_processor = SentrySpanProcessor()
564+
565+
span_processor.otel_span_map = {
566+
"111111111abcdef": MagicMock(), # should stay
567+
"2222222222abcdef": MagicMock(), # should go
568+
"3333333333abcdef": MagicMock(), # should go
569+
}
570+
current_time_minutes = int(time.time() / 60)
571+
span_processor.open_spans = {
572+
current_time_minutes - 3: {"111111111abcdef"}, # should stay
573+
current_time_minutes
574+
- 11: {"2222222222abcdef", "3333333333abcdef"}, # should go
575+
}
576+
577+
span_processor.on_start(otel_span, parent_context)
578+
assert sorted(list(span_processor.otel_span_map.keys())) == [
579+
"111111111abcdef",
580+
"1234567890abcdef",
581+
]
582+
assert sorted(list(span_processor.open_spans.values())) == [
583+
{"111111111abcdef"},
584+
{"1234567890abcdef"},
585+
]
586+
587+
588+
def test_pruning_old_spans_on_end():
589+
otel_span = MagicMock()
590+
otel_span.name = "Sample OTel Span"
591+
otel_span.start_time = time.time_ns()
592+
span_context = SpanContext(
593+
trace_id=int("1234567890abcdef1234567890abcdef", 16),
594+
span_id=int("1234567890abcdef", 16),
595+
is_remote=True,
596+
)
597+
otel_span.get_span_context.return_value = span_context
598+
otel_span.parent = MagicMock()
599+
otel_span.parent.span_id = int("abcdef1234567890", 16)
600+
601+
fake_sentry_span = MagicMock(spec=Span)
602+
fake_sentry_span.set_context = MagicMock()
603+
fake_sentry_span.finish = MagicMock()
604+
605+
span_processor = SentrySpanProcessor()
606+
span_processor._get_otel_context = MagicMock()
607+
span_processor._update_span_with_otel_data = MagicMock()
608+
609+
span_processor.otel_span_map = {
610+
"111111111abcdef": MagicMock(), # should stay
611+
"2222222222abcdef": MagicMock(), # should go
612+
"3333333333abcdef": MagicMock(), # should go
613+
"1234567890abcdef": fake_sentry_span, # should go (because it is closed)
614+
}
615+
current_time_minutes = int(time.time() / 60)
616+
span_processor.open_spans = {
617+
current_time_minutes: {"1234567890abcdef"}, # should go (because it is closed)
618+
current_time_minutes - 3: {"111111111abcdef"}, # should stay
619+
current_time_minutes
620+
- 11: {"2222222222abcdef", "3333333333abcdef"}, # should go
621+
}
622+
623+
span_processor.on_end(otel_span)
624+
assert sorted(list(span_processor.otel_span_map.keys())) == ["111111111abcdef"]
625+
assert sorted(list(span_processor.open_spans.values())) == [{"111111111abcdef"}]

0 commit comments

Comments
 (0)