Skip to content

Commit cfca23d

Browse files
BYKchargome
andauthored
fix(otel): Don't ignore child spans after the root is sent (#16416)
This fixes a critical issue I was having with Mastra where almost all my spans from a workflow run were getting stuck due to their parent span being sent almost immediately. I'm guessing this has to do something with `async` stuff going on. Regardless, by treating these children as 'root spans' I was able to get them out and displayed correctly in Spotlight. What this does is creates a 5-min capped 'already sent span ids' cache and treats any span that has a parent id in this list as a root span to get them unstuck. ![image](https://github.com/user-attachments/assets/bae9376b-d6db-4c87-ae0f-9f30021f3a8d) --------- Co-authored-by: Charly Gomez <charly.gomez@sentry.io>
1 parent 72b0a18 commit cfca23d

File tree

5 files changed

+218
-84
lines changed

5 files changed

+218
-84
lines changed

dev-packages/e2e-tests/test-applications/nextjs-14/playwright.config.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,9 @@ if (!testEnv) {
55
throw new Error('No test env defined');
66
}
77

8-
const config = getPlaywrightConfig(
9-
{
10-
startCommand: testEnv === 'development' ? 'pnpm next dev -p 3030' : 'pnpm next start -p 3030',
11-
port: 3030,
12-
},
13-
{
14-
// This comes with the risk of tests leaking into each other but the tests run quite slow so we should parallelize
15-
workers: '100%',
16-
},
17-
);
8+
const config = getPlaywrightConfig({
9+
startCommand: testEnv === 'development' ? 'pnpm next dev -p 3030' : 'pnpm next start -p 3030',
10+
port: 3030,
11+
});
1812

1913
export default config;

dev-packages/e2e-tests/test-applications/nextjs-app-dir/playwright.config.mjs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,9 @@ if (!testEnv) {
55
throw new Error('No test env defined');
66
}
77

8-
const config = getPlaywrightConfig(
9-
{
10-
startCommand: testEnv === 'development' ? 'pnpm next dev -p 3030' : 'pnpm next start -p 3030',
11-
port: 3030,
12-
},
13-
{
14-
// This comes with the risk of tests leaking into each other but the tests run quite slow so we should parallelize
15-
workers: '100%',
16-
},
17-
);
8+
const config = getPlaywrightConfig({
9+
startCommand: testEnv === 'development' ? 'pnpm next dev -p 3030' : 'pnpm next start -p 3030',
10+
port: 3030,
11+
});
1812

1913
export default config;

dev-packages/opentelemetry-v2-tests/test/integration/transactions.test.ts

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import type { SpanContext } from '@opentelemetry/api';
22
import { context, ROOT_CONTEXT, trace, TraceFlags } from '@opentelemetry/api';
33
import { TraceState } from '@opentelemetry/core';
4-
import type { SpanProcessor } from '@opentelemetry/sdk-trace-base';
54
import type { Event, TransactionEvent } from '@sentry/core';
65
import {
76
addBreadcrumb,
@@ -15,7 +14,6 @@ import {
1514
} from '@sentry/core';
1615
import { afterEach, describe, expect, it, vi } from 'vitest';
1716
import { SENTRY_TRACE_STATE_DSC } from '../../../../packages/opentelemetry/src/constants';
18-
import { SentrySpanProcessor } from '../../../../packages/opentelemetry/src/spanProcessor';
1917
import { startInactiveSpan, startSpan } from '../../../../packages/opentelemetry/src/trace';
2018
import { makeTraceState } from '../../../../packages/opentelemetry/src/utils/makeTraceState';
2119
import { cleanupOtel, getProvider, getSpanProcessor, mockSdkInit } from '../helpers/mockSdkInit';
@@ -550,7 +548,60 @@ describe('Integration | Transactions', () => {
550548
expect(finishedSpans.length).toBe(0);
551549
});
552550

553-
it('discards child spans that are finished after their parent span', async () => {
551+
it('collects child spans that are finished within 5 minutes their parent span has been sent', async () => {
552+
const timeout = 5 * 60 * 1000;
553+
const now = Date.now();
554+
vi.useFakeTimers();
555+
vi.setSystemTime(now);
556+
557+
const logs: unknown[] = [];
558+
vi.spyOn(logger, 'log').mockImplementation(msg => logs.push(msg));
559+
560+
const transactions: Event[] = [];
561+
562+
mockSdkInit({
563+
tracesSampleRate: 1,
564+
beforeSendTransaction: event => {
565+
transactions.push(event);
566+
return null;
567+
},
568+
});
569+
570+
const provider = getProvider();
571+
const spanProcessor = getSpanProcessor();
572+
573+
const exporter = spanProcessor ? spanProcessor['_exporter'] : undefined;
574+
575+
if (!exporter) {
576+
throw new Error('No exporter found, aborting test...');
577+
}
578+
579+
startSpanManual({ name: 'test name' }, async span => {
580+
const subSpan = startInactiveSpan({ name: 'inner span 1' });
581+
subSpan.end();
582+
583+
const subSpan2 = startInactiveSpan({ name: 'inner span 2' });
584+
585+
span.end();
586+
587+
setTimeout(() => {
588+
subSpan2.end();
589+
}, timeout - 2);
590+
});
591+
592+
vi.advanceTimersByTime(timeout - 1);
593+
594+
expect(transactions).toHaveLength(2);
595+
expect(transactions[0]?.spans).toHaveLength(1);
596+
597+
const finishedSpans: any = exporter['_finishedSpanBuckets'].flatMap(bucket =>
598+
bucket ? Array.from(bucket.spans) : [],
599+
);
600+
expect(finishedSpans.length).toBe(0);
601+
});
602+
603+
it('discards child spans that are finished after 5 minutes their parent span has been sent', async () => {
604+
const timeout = 5 * 60 * 1000;
554605
const now = Date.now();
555606
vi.useFakeTimers();
556607
vi.setSystemTime(now);
@@ -587,10 +638,10 @@ describe('Integration | Transactions', () => {
587638

588639
setTimeout(() => {
589640
subSpan2.end();
590-
}, 1);
641+
}, timeout + 1);
591642
});
592643

593-
vi.advanceTimersByTime(2);
644+
vi.advanceTimersByTime(timeout + 2);
594645

595646
expect(transactions).toHaveLength(1);
596647
expect(transactions[0]?.spans).toHaveLength(1);

packages/opentelemetry/src/spanExporter.ts

Lines changed: 93 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ export class SentrySpanExporter {
7171
private _finishedSpanBucketSize: number;
7272
private _spansToBucketEntry: WeakMap<ReadableSpan, FinishedSpanBucket>;
7373
private _lastCleanupTimestampInS: number;
74+
// Essentially a a set of span ids that are already sent. The values are expiration
75+
// times in this cache so we don't hold onto them indefinitely.
76+
private _sentSpans: Map<string, number>;
7477

7578
public constructor(options?: {
7679
/** Lower bound of time in seconds until spans that are buffered but have not been sent as part of a transaction get cleared from memory. */
@@ -80,6 +83,48 @@ export class SentrySpanExporter {
8083
this._finishedSpanBuckets = new Array(this._finishedSpanBucketSize).fill(undefined);
8184
this._lastCleanupTimestampInS = Math.floor(Date.now() / 1000);
8285
this._spansToBucketEntry = new WeakMap();
86+
this._sentSpans = new Map<string, number>();
87+
}
88+
89+
/**
90+
* Check if a span with the given ID has already been sent using the `_sentSpans` as a cache.
91+
* Purges "expired" spans from the cache upon checking.
92+
* @param spanId The span id to check.
93+
* @returns Whether the span is already sent in the past X seconds.
94+
*/
95+
public isSpanAlreadySent(spanId: string): boolean {
96+
const expirationTime = this._sentSpans.get(spanId);
97+
if (expirationTime) {
98+
if (Date.now() >= expirationTime) {
99+
this._sentSpans.delete(spanId); // Remove expired span
100+
} else {
101+
return true;
102+
}
103+
}
104+
return false;
105+
}
106+
107+
/** Remove "expired" span id entries from the _sentSpans cache. */
108+
public flushSentSpanCache(): void {
109+
const currentTimestamp = Date.now();
110+
// Note, it is safe to delete items from the map as we go: https://stackoverflow.com/a/35943995/90297
111+
for (const [spanId, expirationTime] of this._sentSpans.entries()) {
112+
if (expirationTime <= currentTimestamp) {
113+
this._sentSpans.delete(spanId);
114+
}
115+
}
116+
}
117+
118+
/** Check if a node is a completed root node or a node whose parent has already been sent */
119+
public nodeIsCompletedRootNode(node: SpanNode): node is SpanNodeCompleted {
120+
return !!node.span && (!node.parentNode || this.isSpanAlreadySent(node.parentNode.id));
121+
}
122+
123+
/** Get all completed root nodes from a list of nodes */
124+
public getCompletedRootNodes(nodes: SpanNode[]): SpanNodeCompleted[] {
125+
// TODO: We should be able to remove the explicit `node is SpanNodeCompleted` type guard
126+
// once we stop supporting TS < 5.5
127+
return nodes.filter((node): node is SpanNodeCompleted => this.nodeIsCompletedRootNode(node));
83128
}
84129

85130
/** Export a single span. */
@@ -113,7 +158,8 @@ export class SentrySpanExporter {
113158
this._spansToBucketEntry.set(span, currentBucket);
114159

115160
// If the span doesn't have a local parent ID (it's a root span), we're gonna flush all the ended spans
116-
if (!getLocalParentId(span)) {
161+
const localParentId = getLocalParentId(span);
162+
if (!localParentId || this.isSpanAlreadySent(localParentId)) {
117163
this._clearTimeout();
118164

119165
// If we got a parent span, we try to send the span tree
@@ -128,30 +174,29 @@ export class SentrySpanExporter {
128174
public flush(): void {
129175
this._clearTimeout();
130176

131-
const finishedSpans: ReadableSpan[] = [];
132-
this._finishedSpanBuckets.forEach(bucket => {
133-
if (bucket) {
134-
finishedSpans.push(...bucket.spans);
135-
}
136-
});
177+
const finishedSpans: ReadableSpan[] = this._finishedSpanBuckets.flatMap(bucket =>
178+
bucket ? Array.from(bucket.spans) : [],
179+
);
137180

138-
const sentSpans = maybeSend(finishedSpans);
181+
this.flushSentSpanCache();
182+
const sentSpans = this._maybeSend(finishedSpans);
183+
for (const span of finishedSpans) {
184+
this._sentSpans.set(span.spanContext().spanId, Date.now() + DEFAULT_TIMEOUT * 1000);
185+
}
139186

140187
const sentSpanCount = sentSpans.size;
141-
142188
const remainingOpenSpanCount = finishedSpans.length - sentSpanCount;
143-
144189
DEBUG_BUILD &&
145190
logger.log(
146191
`SpanExporter exported ${sentSpanCount} spans, ${remainingOpenSpanCount} spans are waiting for their parent spans to finish`,
147192
);
148193

149-
sentSpans.forEach(span => {
194+
for (const span of sentSpans) {
150195
const bucketEntry = this._spansToBucketEntry.get(span);
151196
if (bucketEntry) {
152197
bucketEntry.spans.delete(span);
153198
}
154-
});
199+
}
155200
}
156201

157202
/** Clear the exporter. */
@@ -167,59 +212,51 @@ export class SentrySpanExporter {
167212
this._flushTimeout = undefined;
168213
}
169214
}
170-
}
171-
172-
/**
173-
* Send the given spans, but only if they are part of a finished transaction.
174-
*
175-
* Returns the sent spans.
176-
* Spans remain unsent when their parent span is not yet finished.
177-
* This will happen regularly, as child spans are generally finished before their parents.
178-
* But it _could_ also happen because, for whatever reason, a parent span was lost.
179-
* In this case, we'll eventually need to clean this up.
180-
*/
181-
function maybeSend(spans: ReadableSpan[]): Set<ReadableSpan> {
182-
const grouped = groupSpansWithParents(spans);
183-
const sentSpans = new Set<ReadableSpan>();
184215

185-
const rootNodes = getCompletedRootNodes(grouped);
216+
/**
217+
* Send the given spans, but only if they are part of a finished transaction.
218+
*
219+
* Returns the sent spans.
220+
* Spans remain unsent when their parent span is not yet finished.
221+
* This will happen regularly, as child spans are generally finished before their parents.
222+
* But it _could_ also happen because, for whatever reason, a parent span was lost.
223+
* In this case, we'll eventually need to clean this up.
224+
*/
225+
private _maybeSend(spans: ReadableSpan[]): Set<ReadableSpan> {
226+
const grouped = groupSpansWithParents(spans);
227+
const sentSpans = new Set<ReadableSpan>();
186228

187-
rootNodes.forEach(root => {
188-
const span = root.span;
189-
sentSpans.add(span);
190-
const transactionEvent = createTransactionForOtelSpan(span);
229+
const rootNodes = this.getCompletedRootNodes(grouped);
191230

192-
// We'll recursively add all the child spans to this array
193-
const spans = transactionEvent.spans || [];
231+
for (const root of rootNodes) {
232+
const span = root.span;
233+
sentSpans.add(span);
234+
const transactionEvent = createTransactionForOtelSpan(span);
194235

195-
root.children.forEach(child => {
196-
createAndFinishSpanForOtelSpan(child, spans, sentSpans);
197-
});
236+
// We'll recursively add all the child spans to this array
237+
const spans = transactionEvent.spans || [];
198238

199-
// spans.sort() mutates the array, but we do not use this anymore after this point
200-
// so we can safely mutate it here
201-
transactionEvent.spans =
202-
spans.length > MAX_SPAN_COUNT
203-
? spans.sort((a, b) => a.start_timestamp - b.start_timestamp).slice(0, MAX_SPAN_COUNT)
204-
: spans;
239+
for (const child of root.children) {
240+
createAndFinishSpanForOtelSpan(child, spans, sentSpans);
241+
}
205242

206-
const measurements = timedEventsToMeasurements(span.events);
207-
if (measurements) {
208-
transactionEvent.measurements = measurements;
209-
}
243+
// spans.sort() mutates the array, but we do not use this anymore after this point
244+
// so we can safely mutate it here
245+
transactionEvent.spans =
246+
spans.length > MAX_SPAN_COUNT
247+
? spans.sort((a, b) => a.start_timestamp - b.start_timestamp).slice(0, MAX_SPAN_COUNT)
248+
: spans;
210249

211-
captureEvent(transactionEvent);
212-
});
213-
214-
return sentSpans;
215-
}
250+
const measurements = timedEventsToMeasurements(span.events);
251+
if (measurements) {
252+
transactionEvent.measurements = measurements;
253+
}
216254

217-
function nodeIsCompletedRootNode(node: SpanNode): node is SpanNodeCompleted {
218-
return !!node.span && !node.parentNode;
219-
}
255+
captureEvent(transactionEvent);
256+
}
220257

221-
function getCompletedRootNodes(nodes: SpanNode[]): SpanNodeCompleted[] {
222-
return nodes.filter(nodeIsCompletedRootNode);
258+
return sentSpans;
259+
}
223260
}
224261

225262
function parseSpan(span: ReadableSpan): { op?: string; origin?: SpanOrigin; source?: TransactionSource } {

0 commit comments

Comments
 (0)