diff --git a/datadog_lambda/constants.py b/datadog_lambda/constants.py index fd8afb3e..6ab62738 100644 --- a/datadog_lambda/constants.py +++ b/datadog_lambda/constants.py @@ -3,9 +3,8 @@ # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. -# Datadog trace sampling priority - +# Datadog trace sampling priority class SamplingPriority(object): USER_REJECT = -1 AUTO_REJECT = 0 @@ -18,6 +17,7 @@ class TraceHeader(object): TRACE_ID = "x-datadog-trace-id" PARENT_ID = "x-datadog-parent-id" SAMPLING_PRIORITY = "x-datadog-sampling-priority" + TAGS = "x-datadog-tags" # X-Ray subsegment to save Datadog trace metadata diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index dfb08dd2..950c4401 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -357,8 +357,10 @@ def extract_context_from_kinesis_event(event, lambda_context): def _deterministic_sha256_hash(s: str, part: str) -> (int, int): - sha256_hash = hashlib.sha256(s.encode()).hexdigest() + return _sha256_to_binary_part(hashlib.sha256(s.encode()).hexdigest(), part) + +def _sha256_to_binary_part(sha256_hash: str, part: str) -> (int, int): # First two chars is '0b'. zfill to ensure 256 bits, but we only care about the first 128 bits binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256) if part == HIGHER_64_BITS: @@ -377,30 +379,49 @@ def extract_context_from_step_functions(event, lambda_context): into lambda's event dict. """ try: - execution_id = event.get("Execution").get("Id") - state_name = event.get("State").get("Name") - state_entered_time = event.get("State").get("EnteredTime") - # returning 128 bits since 128bit traceId will be break up into - # traditional traceId and _dd.p.tid tag - # https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240 - trace_id = _deterministic_sha256_hash(execution_id, LOWER_64_BITS) - - parent_id = _deterministic_sha256_hash( - f"{execution_id}#{state_name}#{state_entered_time}", HIGHER_64_BITS - ) + meta = {} + + if "_datadog" in event: + dd_data = event.get("_datadog") + parent_id = _sha256_to_binary_part( + dd_data.get("x-datadog-parent-id-hash"), HIGHER_64_BITS + ) + if "x-datadog-trace-id" in dd_data: # lambda root + dd_data["x-datadog-parent-id"] = str(parent_id) + return propagator.extract(dd_data) + else: # sfn root + trace_id = _sha256_to_binary_part( + dd_data.get("x-datadog-trace-id-hash"), LOWER_64_BITS + ) + meta["_dd.p.tid"] = hex( + _sha256_to_binary_part( + dd_data.get("x-datadog-trace-id-hash"), HIGHER_64_BITS + ) + )[2:] + else: + execution_id = event.get("Execution").get("Id") + state_name = event.get("State").get("Name") + state_entered_time = event.get("State").get("EnteredTime") + # returning 128 bits since 128bit traceId will be break up into + # traditional traceId and _dd.p.tid tag + # https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240 + trace_id = _deterministic_sha256_hash(execution_id, LOWER_64_BITS) + # take the higher 64 bits as _dd.p.tid tag and use hex to encode + # [2:] to remove '0x' in the hex str + meta["_dd.p.tid"] = hex( + _deterministic_sha256_hash(execution_id, HIGHER_64_BITS) + )[2:] + + parent_id = _deterministic_sha256_hash( + f"{execution_id}#{state_name}#{state_entered_time}", HIGHER_64_BITS + ) sampling_priority = SamplingPriority.AUTO_KEEP return Context( trace_id=trace_id, span_id=parent_id, sampling_priority=sampling_priority, - # take the higher 64 bits as _dd.p.tid tag and use hex to encode - # [2:] to remove '0x' in the hex str - meta={ - "_dd.p.tid": hex( - _deterministic_sha256_hash(execution_id, HIGHER_64_BITS) - )[2:] - }, + meta=meta, ) except Exception as e: logger.debug("The Step Functions trace extractor returned with error %s", e) @@ -415,7 +436,9 @@ def is_legacy_lambda_step_function(event): return False event = event.get("Payload") - return "Execution" in event and "StateMachine" in event and "State" in event + return "_datadog" in event or ( + "Execution" in event and "StateMachine" in event and "State" in event + ) def extract_context_custom_extractor(extractor, event, lambda_context): @@ -670,6 +693,7 @@ def create_inferred_span( event_source: _EventSource = None, decode_authorizer_context: bool = True, ): + logger.debug("abhinav event %s", event) if event_source is None: event_source = parse_event_source(event) try: diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 64eff1a0..18b8dfb1 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -146,7 +146,9 @@ def parse_event_source(event: dict) -> _EventSource: if event.get("source") == "aws.events" or has_event_categories: event_source = _EventSource(EventTypes.CLOUDWATCH_EVENTS) - if "Execution" in event and "StateMachine" in event and "State" in event: + if ( + "_datadog" in event and event.get("_datadog").get("serverless-version") == "v2" + ) or ("Execution" in event and "StateMachine" in event and "State" in event): event_source = _EventSource(EventTypes.STEPFUNCTIONS) event_record = get_first_record(event) diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 22ac7049..06d428b6 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -617,7 +617,7 @@ def test_with_complete_datadog_trace_headers_with_trigger_tags(self): @with_trace_propagation_style("datadog") def test_step_function_trace_data(self): lambda_ctx = get_mock_context() - sqs_event = { + sfn_event = { "Execution": { "Id": "665c417c-1237-4742-aaca-8b3becbb9e75", }, @@ -627,7 +627,7 @@ def test_step_function_trace_data(self): "EnteredTime": "Mon Nov 13 12:43:33 PST 2023", }, } - ctx, source, event_source = extract_dd_trace_context(sqs_event, lambda_ctx) + ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) self.assertEqual(source, "event") expected_context = Context( trace_id=3675572987363469717, @@ -642,7 +642,79 @@ def test_step_function_trace_data(self): TraceHeader.TRACE_ID: "3675572987363469717", TraceHeader.PARENT_ID: "10713633173203262661", TraceHeader.SAMPLING_PRIORITY: "1", - "x-datadog-tags": "_dd.p.tid=e987c84b36b11ab", + TraceHeader.TAGS: "_dd.p.tid=e987c84b36b11ab", + }, + ) + create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) + self.mock_send_segment.assert_called_with( + XraySubsegment.TRACE_KEY, + expected_context, + ) + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_lambda_root(self): + lambda_ctx = get_mock_context() + sfn_event = { + "_datadog": { + "x-datadog-trace-id": "5821803790426892636", + "x-datadog-sampling-priority": "1", + "x-datadog-tags": "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000", + "traceparent": "00-672a7cb10000000050cb33b3c06ae95c-5fda9d8d1d1373f9-01", + "tracestate": "dd=p:5fda9d8d1d1373f9;s:1;t.dm:-0;t.tid:672a7cb100000000", + "x-datadog-parent-id-hash": "a926584eba705d6ec904c54db2ecc4d4a2c91e7dabe7ce87ac26edb43388fbc5", + "serverless-version": "v2", + } + } + ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) + self.assertEqual(source, "event") + expected_context = Context( + trace_id=137131089076080415507232535361568303452, + span_id=2965154499828669806, + sampling_priority=1, + meta={"_dd.p.dm": "-0", "_dd.p.tid": "672a7cb100000000"}, + ) + self.assertEqual(ctx, expected_context) + self.assertEqual( + get_dd_trace_context(), + { + TraceHeader.TRACE_ID: "5821803790426892636", + TraceHeader.PARENT_ID: "10713633173203262661", + TraceHeader.SAMPLING_PRIORITY: "1", + TraceHeader.TAGS: "_dd.p.dm=-0,_dd.p.tid=672a7cb100000000", + }, + ) + create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) + self.mock_send_segment.assert_called_with( + XraySubsegment.TRACE_KEY, + expected_context, + ) + + @with_trace_propagation_style("datadog") + def test_step_function_trace_data_sfn_root(self): + lambda_ctx = get_mock_context() + sfn_event = { + "_datadog": { + "x-datadog-trace-id-hash": "fed93f8c162880cb9aa90fcd1f8395383835841d5470d30215f3dd52906ebc58", + "x-datadog-parent-id-hash": "c5eb94cc9220ab5783e1db53debd54b8c93f6f2a3eae1c680d7b849f2d34e551", + "serverless-version": "v2", + } + } + ctx, source, event_source = extract_dd_trace_context(sfn_event, lambda_ctx) + self.assertEqual(source, "event") + expected_context = Context( + trace_id=1921084089721656632, + span_id=5038284214489885527, + sampling_priority=1, + meta={"_dd.p.tid": "7ed93f8c162880cb"}, + ) + self.assertEqual(ctx, expected_context) + self.assertEqual( + get_dd_trace_context(), + { + TraceHeader.TRACE_ID: "1921084089721656632", + TraceHeader.PARENT_ID: "10713633173203262661", + TraceHeader.SAMPLING_PRIORITY: "1", + TraceHeader.TAGS: "_dd.p.tid=7ed93f8c162880cb", }, ) create_dd_dummy_metadata_subsegment(ctx, XraySubsegment.TRACE_KEY) @@ -666,6 +738,17 @@ def test_is_legacy_lambda_step_function(self): } self.assertTrue(is_legacy_lambda_step_function(sf_event)) + sf_event = { + "Payload": { + "_datadog": { + "x-datadog-trace-id-hash": "fed93f8c162880cb9aa90fcd1f8395383835841d5470d30215f3dd52906ebc58", + "x-datadog-parent-id-hash": "c5eb94cc9220ab5783e1db53debd54b8c93f6f2a3eae1c680d7b849f2d34e551", + "serverless-version": "v2", + } + } + } + self.assertTrue(is_legacy_lambda_step_function(sf_event)) + sf_event = { "Execution": { "Id": "665c417c-1237-4742-aaca-8b3becbb9e75",