DataDog · sfirrin · Mar 10, 2020 · Feb 24, 2020 · Feb 25, 2020 · Feb 26, 2020
diff --git a/.gitignore b/.gitignore
@@ -36,3 +36,6 @@ nosetests.xml
 .eggs/
 .env/
 .idea/
+
+
+**/.serverless/
diff --git a/datadog_lambda/patch.py b/datadog_lambda/patch.py
@@ -3,6 +3,8 @@
 # This product includes software developed at Datadog (https://www.datadoghq.com/).
 # Copyright 2019 Datadog, Inc.
 
+import json
+import os
 import sys
 import logging
 
@@ -13,9 +15,9 @@
 logger = logging.getLogger(__name__)
 
 if sys.version_info >= (3, 0, 0):
-    httplib_module = 'http.client'
+    httplib_module = "http.client"
 else:
-    httplib_module = 'httplib'
+    httplib_module = "httplib"
 
 _httplib_patched = False
 _requests_patched = False
@@ -38,12 +40,9 @@ def _patch_httplib():
     global _httplib_patched
     if not _httplib_patched:
         _httplib_patched = True
-        wrap(
-            httplib_module,
-            'HTTPConnection.request',
-            _wrap_httplib_request
-        )
-    logger.debug('Patched %s', httplib_module)
+        wrap(httplib_module, "HTTPConnection.request", _wrap_httplib_request)
+
+    logger.debug("Patched %s", httplib_module)
 
 
 def _patch_requests():
@@ -55,14 +54,10 @@ def _patch_requests():
     if not _requests_patched:
         _requests_patched = True
         try:
-            wrap(
-                'requests',
-                'Session.request',
-                _wrap_requests_request
-            )
-            logger.debug('Patched requests')
+            wrap("requests", "Session.request", _wrap_requests_request)
+            logger.debug("Patched requests")
         except Exception:
-            logger.debug('Failed to patch requests', exc_info=True)
+            logger.debug("Failed to patch requests", exc_info=True)
 
 
 def _wrap_requests_request(func, instance, args, kwargs):
@@ -71,12 +66,17 @@ def _wrap_requests_request(func, instance, args, kwargs):
     into the outgoing requests.
     """
     context = get_dd_trace_context()
-    if 'headers' in kwargs:
-        kwargs['headers'].update(context)
+    if "headers" in kwargs:
+        kwargs["headers"].update(context)
     elif len(args) >= 5:
         args[4].update(context)
     else:
-        kwargs['headers'] = context
+        kwargs["headers"] = context
+
+    # If we're in an integration test, log the HTTP requests made
+    if os.environ.get("DD_INTEGRATION_TEST", "false").lower() == "true":
+        _print_request_string(args, kwargs)
+
     return func(*args, **kwargs)
 
 
@@ -86,10 +86,49 @@ def _wrap_httplib_request(func, instance, args, kwargs):
     the Datadog trace headers into the outgoing requests.
     """
     context = get_dd_trace_context()
-    if 'headers' in kwargs:
-        kwargs['headers'].update(context)
+    if "headers" in kwargs:
+        kwargs["headers"].update(context)
     elif len(args) >= 4:
         args[3].update(context)
     else:
-        kwargs['headers'] = context
+        kwargs["headers"] = context
+
     return func(*args, **kwargs)
+
+
+def _print_request_string(args, kwargs):
+    """Print the request so that it can be checked in integration tests
+
+    Only used by integration tests.
+    """
+    # Normalizes the different ways args can be passed to a request
+    # to prevent test flakiness
+    method = None
+    if len(args) > 0:
+        method = args[0]
+    else:
+        method = kwargs.get("method", "").upper()
+
+    url = None
+    if len(args) > 1:
+        url = args[1]
+    else:
+        url = kwargs.get("url")
+
+    # Sort the datapoints POSTed by their name so that snapshots always align
+    data = kwargs.get("data", "{}")
+    data_dict = json.loads(data)
+    data_dict.get("series", []).sort(key=lambda series: series.get("metric"))
+    sorted_data = json.dumps(data_dict)
+
+    # Sort headers to prevent any differences in ordering
+    headers = kwargs.get("headers", {})
+    sorted_headers = sorted(
+        "{}:{}".format(key, value) for key, value in headers.items()
+    )
+    sorted_header_str = json.dumps(sorted_headers)
+    print(
+        "HTTP {} {} Headers: {} Data: {}".format(
+            method, url, sorted_header_str, sorted_data
+        )
+    )
diff --git a/scripts/run_integration_tests.sh b/scripts/run_integration_tests.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+# Usage - run commands from repo root:
+# To check if new changes to the layer cause changes to any snapshots:
+#   BUILD_LAYERS=true DD_API_KEY=XXXX aws-vault exec sandbox-account-admin -- ./scripts/run_integration_tests
+# To regenerate snapshots:
+#   UPDATE_SNAPSHOTS=true DD_API_KEY=XXXX aws-vault exec sandbox-account-admin -- ./scripts/run_integration_tests
+
+set -e
+
+# These values need to be in sync with serverless.yml, where there needs to be a function
+# defined for every handler_runtime combination
+LAMBDA_HANDLERS=("async-metrics" "sync-metrics" "http-requests")
+RUNTIMES=("python27" "python36" "python37" "python38")
+
+LOGS_WAIT_SECONDS=20
+
+script_path=${BASH_SOURCE[0]}
+scripts_dir=$(dirname $script_path)
+repo_dir=$(dirname $scripts_dir)
+integration_tests_dir="$repo_dir/tests/integration"
+
+script_start_time=$(date --iso-8601=seconds)
+
+mismatch_found=false
+
+if [ -z "$DD_API_KEY" ]; then
+    echo "No DD_API_KEY env var set, exiting"
+    exit 1
+fi
+
+if [ -n "$UPDATE_SNAPSHOTS" ]; then
+    echo "Overwriting snapshots in this execution"
+fi
+
+if [ -n "$BUILD_LAYERS" ]; then
+    echo "Building layers that will be deployed with our test functions"
+    source $scripts_dir/build_layers.sh
+else
+    echo "Not building layers, ensure they've already been built or re-run with 'BUILD_LAYERS=true DD_API_KEY=XXXX ./scripts/run_integration_tests.sh'"
+fi
+
+cd $integration_tests_dir
+input_event_files=$(ls ./input_events)
+# Sort event files by name so that snapshots stay consistent
+input_event_files=($(for file_name in ${input_event_files[@]}; do echo $file_name; done | sort))
+
+echo "Deploying functions"
+serverless deploy
+
+echo "Invoking functions"
+set +e # Don't exit this script if an invocation fails or there's a diff
+for handler_name in "${LAMBDA_HANDLERS[@]}"; do
+    for runtime in "${RUNTIMES[@]}"; do
+        function_name="${handler_name}_${runtime}"
+        # Invoke function once for each input event
+        for input_event_file in "${input_event_files[@]}"; do
+            # Get event name without trailing ".json" so we can build the snapshot file name
+            input_event_name=$(echo "$input_event_file" | sed "s/.json//")
+            # Return value snapshot file format is snapshots/return_values/{handler}_{runtime}_{input-event}
+            snapshot_path="./snapshots/return_values/${function_name}_${input_event_name}.json"
+
+            return_value=$(serverless invoke -f $function_name --path "./input_events/$input_event_file")
+
+            if [ ! -f $snapshot_path ]; then
+                # If the snapshot file doesn't exist yet, we create it
+                echo "Writing return value to $snapshot_path because no snapshot exists yet"
+                echo "$return_value" >$snapshot_path
+            elif [ -n "$UPDATE_SNAPSHOTS" ]; then
+                # If $UPDATE_SNAPSHOTS is set to true, write the new logs over the current snapshot
+                echo "Overwriting return value snapshot for $snapshot_path"
+                echo "$return_value" >$snapshot_path
+            else
+                # Compare new return value to snapshot
+                diff_output=$(echo "$return_value" | diff - $snapshot_path)
+                if [ $? -eq 1 ]; then
+                    echo "Failed: Return value for $function_name does not match snapshot:"
+                    echo "$diff_output"
+                    mismatch_found=true
+                else
+                    echo "Ok: Return value for $function_name with $input_event_name event matches snapshot"
+                fi
+            fi
+        done
+
+    done
+
+done
+set -e
+
+echo "Sleeping $LOGS_WAIT_SECONDS seconds to wait for logs to appear in CloudWatch..."
+sleep $LOGS_WAIT_SECONDS
+
+echo "Fetching logs for invocations and comparing to snapshots"
+for handler_name in "${LAMBDA_HANDLERS[@]}"; do
+    for runtime in "${RUNTIMES[@]}"; do
+        function_name="${handler_name}_${runtime}"
+        function_snapshot_path="./snapshots/logs/$function_name.log"
+
+        # Fetch logs with serverless cli
+        raw_logs=$(serverless logs -f $function_name --startTime $script_start_time)
+
+        # Replace invocation-specific data like timestamps and IDs with XXXX to normalize logs across executions
+        logs=$(
+            echo "$raw_logs" |
+                # Filter serverless cli errors
+                sed '/Serverless: Recoverable error occurred/d' |
+                # Remove blank lines
+                sed '/^$/d' |
+                # Normalize Lambda runtime report logs
+                sed -E 's/(RequestId|TraceId|SegmentId|Duration|Memory Used|"e"): [a-z0-9\.\-]+/\1: XXXX/g' |
+                # Normalize DD APM headers and AWS account ID
+                sed -E "s/(x-datadog-parent-id:|x-datadog-trace-id:|account_id:)[0-9]+/\1XXXX/g" |
+                # Normalize timestamps in datapoints POSTed to DD
+                sed -E 's/"points": \[\[[0-9\.]+,/"points": \[\[XXXX,/g' |
+                # Strip API key from logged requests
+                sed -E "s/(api_key=|'api_key': ')[a-z0-9\.\-]+/\1XXXX/g" |
+                # Normalize minor package version so that these snapshots aren't broken on version bumps
+                sed -E "s/(dd_lambda_layer:datadog-python[0-9]+_2\.)[0-9]+\.0/\1XX\.0/g"
+        )
+
+        if [ ! -f $function_snapshot_path ]; then
+            # If no snapshot file exists yet, we create one
+            echo "Writing logs to $function_snapshot_path because no snapshot exists yet"
+            echo "$logs" >$function_snapshot_path
+        elif [ -n "$UPDATE_SNAPSHOTS" ]; then
+            # If $UPDATE_SNAPSHOTS is set to true write the new logs over the current snapshot
+            echo "Overwriting log snapshot for $function_snapshot_path"
+            echo "$logs" >$function_snapshot_path
+        else
+            # Compare new logs to snapshots
+            set +e # Don't exit this script if there is a diff
+            diff_output=$(echo "$logs" | diff - $function_snapshot_path)
+            if [ $? -eq 1 ]; then
+                echo "Failed: Mismatch found between new $function_name logs (first) and snapshot (second):"
+                echo "$diff_output"
+                mismatch_found=true
+            else
+                echo "Ok: New logs for $function_name match snapshot"
+            fi
+            set -e
+        fi
+    done
+done
+
+if [ "$mismatch_found" = true ]; then
+    echo "FAILURE: A mismatch between new data and a snapshot was found and printed above."
+    echo "If the change is expected, generate new snapshots by running 'UPDATE_SNAPSHOTS=true DD_API_KEY=XXXX ./scripts/run_integration_tests.sh'"
+    exit 1
+fi
+
+if [ -n "$UPDATE_SNAPSHOTS" ]; then
+    echo "SUCCESS: Wrote new snapshots for all functions"
+    exit 0
+fi
+
+echo "SUCCESS: No difference found between snapshots and new return values or logs"
diff --git a/tests/integration/handle.py b/tests/integration/handle.py
@@ -0,0 +1,34 @@
+import json
+
+from datadog_lambda.metric import lambda_metric
+from datadog_lambda.wrapper import datadog_lambda_wrapper
+
+
+@datadog_lambda_wrapper
+def handle(event, context):
+    # Parse request ID and record ids out of the event to include in the response
+    request_id = event.get("requestContext", {}).get("requestId")
+    event_records = event.get("Records", [])
+
+    record_ids = []
+    for record in event_records:
+        # SQS
+        if record.get("messageId"):
+            record_ids.append(record["messageId"])
+        # SNS
+        if record.get("Sns", {}).get("MessageId"):
+            record_ids.append(record["Sns"]["MessageId"])
+
+    lambda_metric("hello.dog", 1, tags=["team:serverless", "role:hello"])
+    lambda_metric(
+        "tests.integration.count", 21, tags=["test:integration", "role:hello"]
+    )
+
+    return {
+        "statusCode": 200,
+        "body": {
+            "message": "hello, dog!",
+            "request_id": request_id,
+            "event_record_ids": record_ids,
+        },
+    }
diff --git a/tests/integration/http_requests.py b/tests/integration/http_requests.py
@@ -0,0 +1,18 @@
+import json
+import requests
+
+from datadog_lambda.metric import lambda_metric
+from datadog_lambda.wrapper import datadog_lambda_wrapper
+
+
+@datadog_lambda_wrapper
+def handle(event, context):
+    lambda_metric("hello.dog", 1, tags=["team:serverless", "role:hello"])
+    lambda_metric(
+        "tests.integration.count", 21, tags=["test:integration", "role:hello"]
+    )
+
+    us_response = requests.get("https://ip-ranges.datadoghq.com/")
+    eu_response = requests.get("https://ip-ranges.datadoghq.eu/")
+
+    return {"statusCode": 200, "body": {"message": "hello, dog!"}}
-Original file line number
+Diff line change
@@ Expand Up / @@ -36,3 +36,6 @@ nosetests.xml @@
     .eggs/
     .env/
     .idea/
+    **/.serverless/