From e7b6f9ac24a350a42ad5727b76d3cdb726d590c4 Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Mon, 10 Apr 2023 16:56:08 -0400 Subject: [PATCH 1/6] Python and Step Functions Span Linking --- datadog_lambda/tracing.py | 39 ++++++++++++++++++++++++++++++++++++++- datadog_lambda/trigger.py | 3 ++- tests/test_tracing.py | 19 ++++++++++++++++--- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 19ef8c04..8575d373 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -2,7 +2,7 @@ # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019 Datadog, Inc. - +import hashlib import logging import os import json @@ -328,6 +328,37 @@ def extract_context_from_kinesis_event(event, lambda_context): return extract_context_from_lambda_context(lambda_context) +def _deterministic_md5_hash(s: str) -> str: + # return hashlib.md5(s.encode("utf-8")).hexdigest() + hex = hashlib.md5(s.encode("ascii")).hexdigest() + # str(int(hex, 16)) + b = bin(int(hex, 16)) + binary_str = str(b) + binary_str_remove_0b = binary_str[2:] + most_significant_64_bits = binary_str_remove_0b[:-64] + return str(int(most_significant_64_bits, 2)) + + +def extract_context_from_step_functions(event, lambda_context): + """ + Only extract datadog trace context when Step Functions Context Object is injected into lambda's event dict + """ + try: + execution_id = event.get("Execution").get("Id") + state_name = event.get("State").get("Name") + state_entered_time = event.get("State").get("EnteredTime") + + trace_id = _deterministic_md5_hash(execution_id) + parent_id = _deterministic_md5_hash( + execution_id + "#" + state_name + "#" + state_entered_time + ) + sampling_priority = SamplingPriority.AUTO_KEEP + return trace_id, parent_id, sampling_priority + except Exception as e: + logger.debug("The Step Functions trace extractor returned with error %s", e) + return extract_context_from_lambda_context(lambda_context) + + def extract_context_custom_extractor(extractor, event, lambda_context): """ Extract Datadog trace context using a custom trace extractor function @@ -440,6 +471,12 @@ def extract_dd_trace_context( parent_id, sampling_priority, ) = extract_context_from_kinesis_event(event, lambda_context) + elif event_source.equals(EventTypes.STEPFUNCTIONS): + ( + trace_id, + parent_id, + sampling_priority, + ) = extract_context_from_step_functions(event, lambda_context) else: trace_id, parent_id, sampling_priority = extract_context_from_lambda_context( lambda_context diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 0576e3f9..41ceedbe 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -34,12 +34,13 @@ class EventTypes(_stringTypedEnum): CLOUDWATCH_EVENTS = "cloudwatch-events" CLOUDFRONT = "cloudfront" DYNAMODB = "dynamodb" + EVENTBRIDGE = "eventbridge" KINESIS = "kinesis" LAMBDA_FUNCTION_URL = "lambda-function-url" S3 = "s3" SNS = "sns" SQS = "sqs" - EVENTBRIDGE = "eventbridge" + STEPFUNCTIONS = "states" class EventSubtypes(_stringTypedEnum): diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 25865d5e..67832e53 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -5,7 +5,8 @@ from unittest.mock import MagicMock, Mock, patch, call import ddtrace -from ddtrace.constants import ERROR_MSG, ERROR_TYPE + +# from ddtrace.constants import ERROR_MSG, ERROR_TYPE from ddtrace import tracer from ddtrace.context import Context @@ -15,6 +16,7 @@ XraySubsegment, ) from datadog_lambda.tracing import ( + _deterministic_md5_hash, create_inferred_span, extract_dd_trace_context, create_dd_dummy_metadata_subsegment, @@ -1334,9 +1336,7 @@ def test_create_inferred_span_from_api_gateway_event_no_apiid(self): event = json.load(event) ctx = get_mock_context() ctx.aws_request_id = "123" - print(event) span = create_inferred_span(event, ctx) - print(span) self.assertEqual(span.get_tag("operation_name"), "aws.apigateway.rest") self.assertEqual( span.service, @@ -1389,3 +1389,16 @@ def test_no_error_with_nonetype_headers(self): lambda_ctx, ) self.assertEqual(ctx, None) + + +class TestStepFunctionsTraceContext(unittest.TestCase): + def test_deterministic_m5_hash(self): + result = _deterministic_md5_hash("some_testing_random_string") + self.assertEqual("2251275791555400689", result) + + def test_deterministic_m5_hash__result_the_same_as_backend(self): + result = _deterministic_md5_hash( + "arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a" + ":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z" + ) + self.assertEqual("8034507082463708833", result) From eb7bbd3436dd44b2214300204b47a810ba65a234 Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Tue, 11 Apr 2023 10:22:10 -0400 Subject: [PATCH 2/6] update hash function and test --- datadog_lambda/tracing.py | 19 +++++++++++-------- tests/test_tracing.py | 8 ++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 8575d373..1ea04bd8 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -329,19 +329,22 @@ def extract_context_from_kinesis_event(event, lambda_context): def _deterministic_md5_hash(s: str) -> str: - # return hashlib.md5(s.encode("utf-8")).hexdigest() + """MD5 here is to generate trace_id, not for any encryption.""" hex = hashlib.md5(s.encode("ascii")).hexdigest() - # str(int(hex, 16)) - b = bin(int(hex, 16)) - binary_str = str(b) - binary_str_remove_0b = binary_str[2:] - most_significant_64_bits = binary_str_remove_0b[:-64] - return str(int(most_significant_64_bits, 2)) + binary = bin(int(hex, 16)) + binary_str = str(binary) + binary_str_remove_0b = binary_str[2:].rjust(128, "0") + most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64] + result = str(int(most_significant_64_bits_without_leading_1, 2)) + if result == "0" * 64: + return "1" + return result def extract_context_from_step_functions(event, lambda_context): """ - Only extract datadog trace context when Step Functions Context Object is injected into lambda's event dict + Only extract datadog trace context when Step Functions Context Object is injected + into lambda's event dict. """ try: execution_id = event.get("Execution").get("Id") diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 67832e53..6c8052f6 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -1402,3 +1402,11 @@ def test_deterministic_m5_hash__result_the_same_as_backend(self): ":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z" ) self.assertEqual("8034507082463708833", result) + + def test_deterministic_m5_hash__always_leading_with_zero(self): + for i in range(100): + result = _deterministic_md5_hash(str(i)) + result_in_binary = bin(int(result)) + # Leading zeros will be omitted, so only test for full 64 bits present + if len(result_in_binary) == 66: # "0b" + 64 bits. + self.assertTrue(result_in_binary.startswith("0b0")) From 25f3d87865fcf4caaccfbc074ad3630a2af2cd5b Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Tue, 11 Apr 2023 10:22:37 -0400 Subject: [PATCH 3/6] rename hex variable --- datadog_lambda/tracing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index 1ea04bd8..b5515de0 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -330,8 +330,8 @@ def extract_context_from_kinesis_event(event, lambda_context): def _deterministic_md5_hash(s: str) -> str: """MD5 here is to generate trace_id, not for any encryption.""" - hex = hashlib.md5(s.encode("ascii")).hexdigest() - binary = bin(int(hex, 16)) + hex_number = hashlib.md5(s.encode("ascii")).hexdigest() + binary = bin(int(hex_number, 16)) binary_str = str(binary) binary_str_remove_0b = binary_str[2:].rjust(128, "0") most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64] From eb94db1613900e4c1c2c83490b4095d22e483633 Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Tue, 11 Apr 2023 10:34:30 -0400 Subject: [PATCH 4/6] uncomment --- tests/test_tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 6c8052f6..7d1b7a29 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -6,7 +6,7 @@ import ddtrace -# from ddtrace.constants import ERROR_MSG, ERROR_TYPE +from ddtrace.constants import ERROR_MSG, ERROR_TYPE from ddtrace import tracer from ddtrace.context import Context From 17c4dd7576c7594d0a9fe1e8b3594a191a0c8eba Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Tue, 11 Apr 2023 10:36:38 -0400 Subject: [PATCH 5/6] remove unused import --- tests/test_tracing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 7d1b7a29..e19c66aa 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -6,7 +6,6 @@ import ddtrace -from ddtrace.constants import ERROR_MSG, ERROR_TYPE from ddtrace import tracer from ddtrace.context import Context From 177b5b5964c33f9929933ce7648b59af584092bb Mon Sep 17 00:00:00 2001 From: Kimi Wu Date: Tue, 11 Apr 2023 14:10:26 -0400 Subject: [PATCH 6/6] set event_source for step functions --- datadog_lambda/tracing.py | 1 - datadog_lambda/trigger.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datadog_lambda/tracing.py b/datadog_lambda/tracing.py index b5515de0..67836e46 100644 --- a/datadog_lambda/tracing.py +++ b/datadog_lambda/tracing.py @@ -350,7 +350,6 @@ def extract_context_from_step_functions(event, lambda_context): execution_id = event.get("Execution").get("Id") state_name = event.get("State").get("Name") state_entered_time = event.get("State").get("EnteredTime") - trace_id = _deterministic_md5_hash(execution_id) parent_id = _deterministic_md5_hash( execution_id + "#" + state_name + "#" + state_entered_time diff --git a/datadog_lambda/trigger.py b/datadog_lambda/trigger.py index 41ceedbe..bbb44b30 100644 --- a/datadog_lambda/trigger.py +++ b/datadog_lambda/trigger.py @@ -146,6 +146,9 @@ def parse_event_source(event: dict) -> _EventSource: if event.get("source") == "aws.events" or has_event_categories: event_source = _EventSource(EventTypes.CLOUDWATCH_EVENTS) + if "Execution" in event and "StateMachine" in event and "State" in event: + event_source = _EventSource(EventTypes.STEPFUNCTIONS) + event_record = get_first_record(event) if event_record: aws_event_source = event_record.get(