Skip to content

Commit 1a75734

Browse files
authored
Redriven Step Functions Trace Merging (#598)
* use redrive count to generate step functions parent ID * dont use redrive_count when 0 * fix tests * fix redrive count type * added link to matching snapshot in logs-backend
1 parent 4061917 commit 1a75734

File tree

4 files changed

+95
-22
lines changed

4 files changed

+95
-22
lines changed

src/trace/context/extractor.spec.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ describe("TraceContextExtractor", () => {
669669
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
670670
RoleArn:
671671
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
672+
RedriveCount: 0,
672673
StartTime: "2022-12-08T21:08:17.924Z",
673674
},
674675
State: {
@@ -883,6 +884,7 @@ describe("TraceContextExtractor", () => {
883884
},
884885
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
885886
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
887+
RedriveCount: 0,
886888
StartTime: "2022-12-08T21:08:17.924Z",
887889
},
888890
State: {
@@ -919,6 +921,7 @@ describe("TraceContextExtractor", () => {
919921
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
920922
RoleArn:
921923
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
924+
RedriveCount: 0,
922925
StartTime: "2022-12-08T21:08:17.924Z",
923926
},
924927
State: {
@@ -959,6 +962,7 @@ describe("TraceContextExtractor", () => {
959962
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
960963
RoleArn:
961964
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
965+
RedriveCount: 0,
962966
StartTime: "2022-12-08T21:08:17.924Z",
963967
},
964968
State: {
@@ -999,6 +1003,7 @@ describe("TraceContextExtractor", () => {
9991003
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
10001004
RoleArn:
10011005
"arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
1006+
RedriveCount: 0,
10021007
StartTime: "2022-12-08T21:08:17.924Z",
10031008
},
10041009
State: {
@@ -1049,6 +1054,7 @@ describe("TraceContextExtractor", () => {
10491054
},
10501055
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
10511056
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
1057+
RedriveCount: 0,
10521058
StartTime: "2022-12-08T21:08:17.924Z",
10531059
},
10541060
State: {
@@ -1086,7 +1092,7 @@ describe("TraceContextExtractor", () => {
10861092

10871093
const sentMessage = sentSegment.toString();
10881094
expect(sentMessage).toEqual(
1089-
'{"format": "json", "version": 1}\n{"id":"11111","trace_id":"1-5e272390-8c398be037738dc042009320","parent_id":"94ae789b969f1cc5","name":"datadog-metadata","start_time":1487076708,"end_time":1487076708,"type":"subsegment","metadata":{"datadog":{"root_span_metadata":{"execution_id":"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf","state_entered_time":"2022-12-08T21:08:19.224Z","state_name":"step-one"}}}}',
1095+
'{"format": "json", "version": 1}\n{"id":"11111","trace_id":"1-5e272390-8c398be037738dc042009320","parent_id":"94ae789b969f1cc5","name":"datadog-metadata","start_time":1487076708,"end_time":1487076708,"type":"subsegment","metadata":{"datadog":{"root_span_metadata":{"execution_id":"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf","redrive_count":"0","state_entered_time":"2022-12-08T21:08:19.224Z","state_name":"step-one"}}}}',
10901096
);
10911097
});
10921098

src/trace/context/extractors/step-function.spec.ts

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,41 @@ describe("StepFunctionEventTraceExtractor", () => {
88
describe("extract", () => {
99
const payload = {
1010
Execution: {
11-
Id: "arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
12-
Input: {
13-
MyInput: "MyValue",
14-
},
15-
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
16-
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
17-
StartTime: "2022-12-08T21:08:17.924Z",
11+
Id: "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316",
12+
Name: "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
13+
RoleArn:
14+
"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
15+
StartTime: "2024-12-04T19:38:04.069Z",
16+
RedriveCount: 0,
1817
},
1918
State: {
20-
Name: "step-one",
21-
EnteredTime: "2022-12-08T21:08:19.224Z",
22-
RetryCount: 2,
19+
Name: "Lambda Invoke",
20+
EnteredTime: "2024-12-04T19:38:04.118Z",
21+
RetryCount: 0,
2322
},
2423
StateMachine: {
25-
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:logs-to-traces-sequential",
26-
Name: "my-state-machine",
24+
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine",
25+
Name: "abhinav-activity-state-machine",
26+
},
27+
};
28+
29+
const redrivePayload = {
30+
Execution: {
31+
Id: "arn:aws:states:sa-east-1:425362996713:execution:abhinav-activity-state-machine:72a7ca3e-901c-41bb-b5a3-5f279b92a316",
32+
Name: "72a7ca3e-901c-41bb-b5a3-5f279b92a316",
33+
RoleArn:
34+
"arn:aws:iam::425362996713:role/service-role/StepFunctions-abhinav-activity-state-machine-role-22jpbgl6j",
35+
StartTime: "2024-12-04T19:38:04.069Z",
36+
RedriveCount: 1,
37+
},
38+
State: {
39+
Name: "Lambda Invoke",
40+
EnteredTime: "2024-12-04T19:38:04.118Z",
41+
RetryCount: 0,
42+
},
43+
StateMachine: {
44+
Id: "arn:aws:states:sa-east-1:425362996713:stateMachine:abhinav-activity-state-machine",
45+
Name: "abhinav-activity-state-machine",
2746
},
2847
};
2948
it("extracts trace context with valid payload", () => {
@@ -36,8 +55,25 @@ describe("StepFunctionEventTraceExtractor", () => {
3655
const traceContext = extractor.extract(payload);
3756
expect(traceContext).not.toBeNull();
3857

39-
expect(traceContext?.toTraceId()).toBe("1139193989631387307");
40-
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
58+
expect(traceContext?.toTraceId()).toBe("435175499815315247");
59+
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
60+
expect(traceContext?.sampleMode()).toBe("1");
61+
expect(traceContext?.source).toBe("event");
62+
});
63+
64+
// https://github.com/DataDog/logs-backend/blob/c17618cb552fc369ca40282bae0a65803f82f694/domains/serverless/apps/logs-to-traces-reducer/src/test/resources/test-json-files/stepfunctions/RedriveTest/snapshots/RedriveLambdaSuccessTraceMerging.json#L46
65+
it("extracts trace context with valid redriven payload", () => {
66+
// Mimick TraceContextService.extract initialization
67+
StepFunctionContextService.instance(redrivePayload);
68+
69+
const extractor = new StepFunctionEventTraceExtractor();
70+
71+
// Payload is sent again for safety in case the instance wasn't previously initialized
72+
const traceContext = extractor.extract(redrivePayload);
73+
expect(traceContext).not.toBeNull();
74+
75+
expect(traceContext?.toTraceId()).toBe("435175499815315247");
76+
expect(traceContext?.toSpanId()).toBe("5063839446130725204");
4177
expect(traceContext?.sampleMode()).toBe("1");
4278
expect(traceContext?.source).toBe("event");
4379
});
@@ -49,8 +85,8 @@ describe("StepFunctionEventTraceExtractor", () => {
4985
const traceContext = extractor.extract(payload);
5086
expect(traceContext).not.toBeNull();
5187

52-
expect(traceContext?.toTraceId()).toBe("1139193989631387307");
53-
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
88+
expect(traceContext?.toTraceId()).toBe("435175499815315247");
89+
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
5490
expect(traceContext?.sampleMode()).toBe("1");
5591
expect(traceContext?.source).toBe("event");
5692
});
@@ -65,8 +101,8 @@ describe("StepFunctionEventTraceExtractor", () => {
65101
const traceContext = extractor.extract({ Payload: payload });
66102
expect(traceContext).not.toBeNull();
67103

68-
expect(traceContext?.toTraceId()).toBe("1139193989631387307");
69-
expect(traceContext?.toSpanId()).toBe("5892738536804826142");
104+
expect(traceContext?.toTraceId()).toBe("435175499815315247");
105+
expect(traceContext?.toSpanId()).toBe("3929055471293792800");
70106
expect(traceContext?.sampleMode()).toBe("1");
71107
expect(traceContext?.source).toBe("event");
72108
});

src/trace/step-function-service.spec.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ describe("StepFunctionContextService", () => {
99
},
1010
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
1111
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
12+
RedriveCount: 0,
1213
StartTime: "2022-12-08T21:08:17.924Z",
1314
},
1415
State: {
@@ -30,6 +31,7 @@ describe("StepFunctionContextService", () => {
3031
},
3132
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
3233
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
34+
RedriveCount: 0,
3335
StartTime: "2022-12-08T21:08:17.924Z",
3436
},
3537
State: {
@@ -55,6 +57,7 @@ describe("StepFunctionContextService", () => {
5557
},
5658
Name: "85a9933e-9e11-83dc-6a61-b92367b6c3be",
5759
RoleArn: "arn:aws:iam::425362996713:role/service-role/StepFunctions-logs-to-traces-sequential-role-ccd69c03",
60+
RedriveCount: 0,
5861
StartTime: "2022-12-08T21:08:17.924Z",
5962
},
6063
State: {
@@ -106,6 +109,16 @@ describe("StepFunctionContextService", () => {
106109
},
107110
},
108111
],
112+
[
113+
"Execution RedriveCount is not a number",
114+
{
115+
...legacyStepFunctionEvent,
116+
Execution: {
117+
...legacyStepFunctionEvent.Execution,
118+
RedriveCount: "0",
119+
},
120+
},
121+
],
109122
[
110123
"State is not defined",
111124
{
@@ -146,6 +159,7 @@ describe("StepFunctionContextService", () => {
146159
expect(instance.context).toEqual({
147160
execution_id:
148161
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
162+
redrive_count: "0",
149163
state_entered_time: "2022-12-08T21:08:19.224Z",
150164
state_name: "step-one",
151165
});
@@ -158,6 +172,7 @@ describe("StepFunctionContextService", () => {
158172
expect(instance.context).toEqual({
159173
execution_id:
160174
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
175+
redrive_count: "0",
161176
state_entered_time: "2022-12-08T21:08:19.224Z",
162177
state_name: "step-one",
163178
root_execution_id:
@@ -173,6 +188,7 @@ describe("StepFunctionContextService", () => {
173188
expect(instance.context).toEqual({
174189
execution_id:
175190
"arn:aws:states:sa-east-1:425362996713:express:logs-to-traces-sequential:85a9933e-9e11-83dc-6a61-b92367b6c3be:3f7ef5c7-c8b8-4c88-90a1-d54aa7e7e2bf",
191+
redrive_count: "0",
176192
state_entered_time: "2022-12-08T21:08:19.224Z",
177193
state_name: "step-one",
178194
trace_id: "10593586103637578129",

src/trace/step-function-service.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { Sha256 } from "@aws-crypto/sha256-js";
55

66
interface NestedStepFunctionContext {
77
execution_id: string;
8+
redrive_count: string;
89
state_entered_time: string;
910
state_name: string;
1011
root_execution_id: string;
@@ -13,6 +14,7 @@ interface NestedStepFunctionContext {
1314

1415
interface LambdaRootStepFunctionContext {
1516
execution_id: string;
17+
redrive_count: string;
1618
state_entered_time: string;
1719
state_name: string;
1820
trace_id: string;
@@ -22,6 +24,7 @@ interface LambdaRootStepFunctionContext {
2224

2325
interface LegacyStepFunctionContext {
2426
execution_id: string;
27+
redrive_count: string;
2528
state_entered_time: string;
2629
state_name: string;
2730
}
@@ -88,12 +91,13 @@ export class StepFunctionContextService {
8891
// Extract the common context variables
8992
const stateMachineContext = this.extractStateMachineContext(event);
9093
if (stateMachineContext === null) return;
91-
const { execution_id, state_entered_time, state_name } = stateMachineContext;
94+
const { execution_id, redrive_count, state_entered_time, state_name } = stateMachineContext;
9295

9396
if (typeof event["serverless-version"] === "string" && event["serverless-version"] === "v1") {
9497
if (typeof event.RootExecutionId === "string") {
9598
this.context = {
9699
execution_id,
100+
redrive_count,
97101
state_entered_time,
98102
state_name,
99103
root_execution_id: event.RootExecutionId,
@@ -102,6 +106,7 @@ export class StepFunctionContextService {
102106
} else if (typeof event["x-datadog-trace-id"] === "string" && typeof event["x-datadog-tags"] === "string") {
103107
this.context = {
104108
execution_id,
109+
redrive_count,
105110
state_entered_time,
106111
state_name,
107112
trace_id: event["x-datadog-trace-id"],
@@ -110,7 +115,7 @@ export class StepFunctionContextService {
110115
} as LambdaRootStepFunctionContext;
111116
}
112117
} else {
113-
this.context = { execution_id, state_entered_time, state_name } as LegacyStepFunctionContext;
118+
this.context = { execution_id, redrive_count, state_entered_time, state_name } as LegacyStepFunctionContext;
114119
}
115120
}
116121

@@ -134,8 +139,15 @@ export class StepFunctionContextService {
134139
return null;
135140
}
136141

142+
const redrivePostfix = this.context.redrive_count === "0" ? "" : `#${this.context.redrive_count}`;
143+
137144
const parentId = this.deterministicSha256HashToBigIntString(
138-
this.context.execution_id + "#" + this.context.state_name + "#" + this.context.state_entered_time,
145+
this.context.execution_id +
146+
"#" +
147+
this.context.state_name +
148+
"#" +
149+
this.context.state_entered_time +
150+
redrivePostfix,
139151
PARENT_ID,
140152
);
141153
const sampleMode = SampleMode.AUTO_KEEP;
@@ -196,12 +208,14 @@ export class StepFunctionContextService {
196208

197209
private extractStateMachineContext(event: any): {
198210
execution_id: string;
211+
redrive_count: string;
199212
state_entered_time: string;
200213
state_name: string;
201214
} | null {
202215
if (this.isValidContextObject(event)) {
203216
return {
204217
execution_id: event.Execution.Id,
218+
redrive_count: event.Execution.RedriveCount.toString(),
205219
state_entered_time: event.State.EnteredTime,
206220
state_name: event.State.Name,
207221
};
@@ -214,6 +228,7 @@ export class StepFunctionContextService {
214228
private isValidContextObject(context: any): boolean {
215229
return (
216230
typeof context?.Execution?.Id === "string" &&
231+
typeof context?.Execution?.RedriveCount === "number" &&
217232
typeof context?.State?.EnteredTime === "string" &&
218233
typeof context?.State?.Name === "string"
219234
);

0 commit comments

Comments
 (0)