diff --git a/CHANGELOG.md b/CHANGELOG.md index c3d8c76207e..32175fd99d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HISTORY +## May 29th + +**0.9.4** + +* **Metrics**: Bugfix - Metrics were not correctly flushed, and cleared on every invocation + ## May 16th **0.9.3** diff --git a/Makefile b/Makefile index ffd5dfa16ff..f98ad7795e8 100644 --- a/Makefile +++ b/Makefile @@ -33,9 +33,8 @@ build-docs: @$(MAKE) build-docs-website @$(MAKE) build-docs-api -build-docs-api: - pip install pdoc3~=0.7.5 - pdoc3 --html --output-dir dist/api/ ./aws_lambda_powertools --force +build-docs-api: dev + poetry run pdoc --html --output-dir dist/api/ ./aws_lambda_powertools --force mv dist/api/aws_lambda_powertools/* dist/api/ rm -rf dist/api/aws_lambda_powertools diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py index 38e513f19a8..83949ad874d 100644 --- a/aws_lambda_powertools/metrics/base.py +++ b/aws_lambda_powertools/metrics/base.py @@ -50,8 +50,8 @@ class MetricManager: """ def __init__(self, metric_set: Dict[str, str] = None, dimension_set: Dict = None, namespace: str = None): - self.metric_set = metric_set or {} - self.dimension_set = dimension_set or {} + self.metric_set = metric_set if metric_set is not None else {} + self.dimension_set = dimension_set if dimension_set is not None else {} self.namespace = os.getenv("POWERTOOLS_METRICS_NAMESPACE") or namespace self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_options = list(MetricUnit.__members__) @@ -116,7 +116,10 @@ def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]): logger.debug(f"Exceeded maximum of {MAX_METRICS} metrics - Publishing existing metric set") metrics = self.serialize_metric_set() print(json.dumps(metrics)) - self.metric_set = {} + + # clear metric set only as opposed to metrics and dimensions set + # since we could have more than 100 metrics + self.metric_set.clear() def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) -> Dict: """Serializes metric and dimensions set diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index 390356b1461..13830411523 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -65,8 +65,15 @@ def do_something(): _metrics = {} _dimensions = {} - def __init__(self, metric_set=None, dimension_set=None, namespace=None): - super().__init__(metric_set=self._metrics, dimension_set=self._dimensions, namespace=namespace) + def __init__(self): + self.metric_set = self._metrics + self.dimension_set = self._dimensions + super().__init__(metric_set=self.metric_set, dimension_set=self.dimension_set) + + def clear_metrics(self): + logger.debug("Clearing out existing metric set from memory") + self.metric_set.clear() + self.dimension_set.clear() def log_metrics(self, lambda_handler: Callable[[Any, Any], Any] = None): """Decorator to serialize and publish metrics at the end of a function execution. @@ -101,6 +108,7 @@ def decorate(*args, **kwargs): response = lambda_handler(*args, **kwargs) finally: metrics = self.serialize_metric_set() + self.clear_metrics() logger.debug("Publishing metrics", {"metrics": metrics}) print(json.dumps(metrics)) diff --git a/docs/content/core/metrics.mdx b/docs/content/core/metrics.mdx index 2f3bc0e6270..b13d0693fab 100644 --- a/docs/content/core/metrics.mdx +++ b/docs/content/core/metrics.mdx @@ -122,6 +122,25 @@ def lambda_handler(evt, ctx): ... ``` +## Flushing metrics manually + +If you prefer not to use `log_metrics` because you might want to encapsulate additional logic when doing so, you can manually flush and clear metrics as follows: + +```python:title=manual_metric_serialization.py +import json +from aws_lambda_powertools.metrics import Metrics, MetricUnit + +metrics = Metrics() +metrics.add_metric(name="ColdStart", unit="Count", value=1) +metrics.add_dimension(name="service", value="booking") + +# highlight-start +your_metrics_object = metrics.serialize_metric_set() +metrics.clear_metrics() +print(json.dumps(your_metrics_object)) +# highlight-end +``` + ## Testing your code Use `POWERTOOLS_METRICS_NAMESPACE` env var when unit testing your code to ensure a metric namespace object is created, and your code doesn't fail validation. diff --git a/pyproject.toml b/pyproject.toml index 337f67d5e8e..da0537b61ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aws_lambda_powertools" -version = "0.9.3" +version = "0.9.4" description = "Python utilities for AWS Lambda functions including but not limited to tracing, logging and custom metric" authors = ["Amazon Web Services"] classifiers=[ diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index 7c6990668bc..71610bc0f19 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -15,6 +15,13 @@ from aws_lambda_powertools.metrics.base import MetricManager +@pytest.fixture(scope="function", autouse=True) +def reset_metric_set(): + metrics = Metrics() + metrics.clear_metrics() + yield + + @pytest.fixture def metric() -> Dict[str, str]: return {"name": "single_metric", "unit": MetricUnit.Count, "value": 1} @@ -57,7 +64,7 @@ def a_hundred_metrics() -> List[Dict[str, str]]: def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: Dict) -> Dict: """ Helper function to build EMF object from a list of metrics, dimensions """ - my_metrics = Metrics() + my_metrics = MetricManager() for dimension in dimensions: my_metrics.add_dimension(**dimension) @@ -84,19 +91,9 @@ def remove_timestamp(metrics: List): del metric["_aws"]["Timestamp"] -def test_single_metric(capsys, metric, dimension, namespace): - with single_metric(**metric) as my_metric: - my_metric.add_dimension(**dimension) - my_metric.add_namespace(**namespace) - - output = json.loads(capsys.readouterr().out.strip()) - expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) - - remove_timestamp(metrics=[output, expected]) # Timestamp will always be different - assert expected["_aws"] == output["_aws"] - - def test_single_metric_one_metric_only(capsys, metric, dimension, namespace): + # GIVEN we attempt to add more than one metric + # WHEN using single_metric context manager with single_metric(**metric) as my_metric: my_metric.add_metric(name="second_metric", unit="Count", value=1) my_metric.add_metric(name="third_metric", unit="Seconds", value=1) @@ -107,29 +104,18 @@ def test_single_metric_one_metric_only(capsys, metric, dimension, namespace): expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) remove_timestamp(metrics=[output, expected]) # Timestamp will always be different - assert expected["_aws"] == output["_aws"] - - -def test_multiple_metrics(metrics, dimensions, namespace): - my_metrics = Metrics() - for metric in metrics: - my_metrics.add_metric(**metric) - - for dimension in dimensions: - my_metrics.add_dimension(**dimension) - my_metrics.add_namespace(**namespace) - output = my_metrics.serialize_metric_set() - expected = serialize_metrics(metrics=metrics, dimensions=dimensions, namespace=namespace) - - remove_timestamp(metrics=[output, expected]) # Timestamp will always be different + # THEN we should only have the first metric added assert expected["_aws"] == output["_aws"] -def test_multiple_namespaces(metric, dimension, namespace): +def test_multiple_namespaces_exception(metric, dimension, namespace): + # GIVEN we attempt to add multiple namespaces namespace_a = {"name": "OtherNamespace"} namespace_b = {"name": "AnotherNamespace"} + # WHEN an EMF object can only have one + # THEN we should raise UniqueNamespaceError exception with pytest.raises(UniqueNamespaceError): with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) @@ -139,6 +125,7 @@ def test_multiple_namespaces(metric, dimension, namespace): def test_log_metrics(capsys, metrics, dimensions, namespace): + # GIVEN Metrics is initialized my_metrics = Metrics() my_metrics.add_namespace(**namespace) for metric in metrics: @@ -146,23 +133,32 @@ def test_log_metrics(capsys, metrics, dimensions, namespace): for dimension in dimensions: my_metrics.add_dimension(**dimension) + # WHEN we utilize log_metrics to serialize + # and flush all metrics at the end of a function execution @my_metrics.log_metrics - def lambda_handler(evt, handler): + def lambda_handler(evt, ctx): return True lambda_handler({}, {}) + output = json.loads(capsys.readouterr().out.strip()) expected = serialize_metrics(metrics=metrics, dimensions=dimensions, namespace=namespace) remove_timestamp(metrics=[output, expected]) # Timestamp will always be different + + # THEN we should have no exceptions + # and a valid EMF object should've been flushed correctly assert expected["_aws"] == output["_aws"] for dimension in dimensions: assert dimension["name"] in output def test_namespace_env_var(monkeypatch, capsys, metric, dimension, namespace): + # GIVEN we use POWERTOOLS_METRICS_NAMESPACE monkeypatch.setenv("POWERTOOLS_METRICS_NAMESPACE", namespace["name"]) + # WHEN creating a metric but don't explicitly + # add a namespace with single_metric(**metric) as my_metrics: my_metrics.add_dimension(**dimension) monkeypatch.delenv("POWERTOOLS_METRICS_NAMESPACE") @@ -171,61 +167,67 @@ def test_namespace_env_var(monkeypatch, capsys, metric, dimension, namespace): expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) remove_timestamp(metrics=[output, expected]) # Timestamp will always be different + + # THEN we should add a namespace implicitly + # with the value of POWERTOOLS_METRICS_NAMESPACE env var assert expected["_aws"] == output["_aws"] def test_metrics_spillover(monkeypatch, capsys, metric, dimension, namespace, a_hundred_metrics): + # GIVEN Metrics is initialized and we have over a hundred metrics to add my_metrics = Metrics() my_metrics.add_dimension(**dimension) my_metrics.add_namespace(**namespace) + # WHEN we add more than 100 metrics for _metric in a_hundred_metrics: my_metrics.add_metric(**_metric) - @my_metrics.log_metrics - def lambda_handler(evt, handler): - my_metrics.add_metric(**metric) - return True - - lambda_handler({}, {}) - - output = capsys.readouterr().out.strip() - spillover_metrics, single_metric = output.split("\n") - spillover_metrics = json.loads(spillover_metrics) - single_metric = json.loads(single_metric) - - expected_single_metric = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) + # THEN it should serialize and flush all metrics at the 100th + # and clear all metrics and dimensions from memory + output = json.loads(capsys.readouterr().out.strip()) + spillover_metrics = output["_aws"]["CloudWatchMetrics"][0]["Metrics"] + assert my_metrics.metric_set == {} + assert len(spillover_metrics) == 100 - serialize_metrics(metrics=a_hundred_metrics, dimensions=[dimension], namespace=namespace) - expected_spillover_metrics = json.loads(capsys.readouterr().out.strip()) + # GIVEN we add the 101th metric + # WHEN we already had a Metric class instance + # with an existing dimension set from the previous 100th metric batch + my_metrics.add_metric(**metric) - remove_timestamp(metrics=[spillover_metrics, expected_spillover_metrics, single_metric, expected_single_metric]) + # THEN serializing the 101th metric should + # create a new EMF object with a single metric in it (101th) + # and contain have the same dimension we previously added + serialized_101th_metric = my_metrics.serialize_metric_set() + expected_101th_metric = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) + remove_timestamp(metrics=[serialized_101th_metric, expected_101th_metric]) - assert single_metric["_aws"] == expected_single_metric["_aws"] - assert spillover_metrics["_aws"] == expected_spillover_metrics["_aws"] + assert serialized_101th_metric["_aws"] == expected_101th_metric["_aws"] -def test_log_metrics_schema_error(metrics, dimensions, namespace): - # It should error out because by default log_metrics doesn't invoke a function - # so when decorator runs it'll raise an error while trying to serialize metrics +def test_log_metrics_should_invoke_function(metric, dimension, namespace): + # GIVEN Metrics is initialized my_metrics = Metrics() + # WHEN log_metrics is used to serialize metrics @my_metrics.log_metrics def lambda_handler(evt, context): - my_metrics.add_namespace(namespace) - for metric in metrics: - my_metrics.add_metric(**metric) - for dimension in dimensions: - my_metrics.add_dimension(**dimension) - return True + my_metrics.add_namespace(**namespace) + my_metrics.add_metric(**metric) + my_metrics.add_dimension(**dimension) + return True - with pytest.raises(SchemaValidationError): - lambda_handler({}, {}) + # THEN log_metrics should invoke the function it decorates + # and return no error if we have a metric, namespace, and a dimension + lambda_handler({}, {}) def test_incorrect_metric_unit(metric, dimension, namespace): + # GIVEN we pass a metric unit not supported by CloudWatch metric["unit"] = "incorrect_unit" + # WHEN we attempt to add a new metric + # THEN it should fail validation and raise MetricUnitError with pytest.raises(MetricUnitError): with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) @@ -233,13 +235,22 @@ def test_incorrect_metric_unit(metric, dimension, namespace): def test_schema_no_namespace(metric, dimension): + # GIVEN we don't add any metric or dimension + # but a namespace + + # WHEN we attempt to serialize a valid EMF object + # THEN it should fail validation and raise SchemaValidationError with pytest.raises(SchemaValidationError): with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) def test_schema_incorrect_value(metric, dimension, namespace): + # GIVEN we pass an incorrect metric value (non-number/float) metric["value"] = "some_value" + + # WHEN we attempt to serialize a valid EMF object + # THEN it should fail validation and raise SchemaValidationError with pytest.raises(MetricValueError): with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) @@ -247,19 +258,28 @@ def test_schema_incorrect_value(metric, dimension, namespace): def test_schema_no_metrics(dimensions, namespace): + # GIVEN Metrics is initialized my_metrics = Metrics() my_metrics.add_namespace(**namespace) + + # WHEN no metrics have been added + # but a namespace and dimensions only for dimension in dimensions: my_metrics.add_dimension(**dimension) + + # THEN it should fail validation and raise SchemaValidationError with pytest.raises(SchemaValidationError): my_metrics.serialize_metric_set() def test_exceed_number_of_dimensions(metric, namespace): + # GIVEN we we have more dimensions than CloudWatch supports dimensions = [] for i in range(11): dimensions.append({"name": f"test_{i}", "value": "test"}) + # WHEN we attempt to serialize them into a valid EMF object + # THEN it should fail validation and raise SchemaValidationError with pytest.raises(SchemaValidationError): with single_metric(**metric) as my_metric: my_metric.add_namespace(**namespace) @@ -267,16 +287,16 @@ def test_exceed_number_of_dimensions(metric, namespace): my_metric.add_dimension(**dimension) -def test_log_metrics_error_propagation(capsys, metric, dimension, namespace): - # GIVEN Metrics are serialized after handler execution - # WHEN If an error occurs and metrics have been added - # THEN we should log metrics and propagate exception up +def test_log_metrics_during_exception(capsys, metric, dimension, namespace): + # GIVEN Metrics is initialized my_metrics = Metrics() my_metrics.add_metric(**metric) my_metrics.add_dimension(**dimension) my_metrics.add_namespace(**namespace) + # WHEN log_metrics is used to serialize metrics + # but an error has been raised during handler execution @my_metrics.log_metrics def lambda_handler(evt, context): raise ValueError("Bubble up") @@ -288,43 +308,75 @@ def lambda_handler(evt, context): expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) remove_timestamp(metrics=[output, expected]) # Timestamp will always be different + # THEN we should log metrics and propagate the exception up assert expected["_aws"] == output["_aws"] def test_log_no_metrics_error_propagation(capsys, metric, dimension, namespace): - # GIVEN Metrics are serialized after handler execution - # WHEN If an error occurs and no metrics have been added - # THEN we should propagate exception up and raise SchemaValidationError + # GIVEN Metrics is initialized my_metrics = Metrics() @my_metrics.log_metrics def lambda_handler(evt, context): + # WHEN log_metrics is used despite having no metrics + # and the function decorated also raised an exception raise ValueError("Bubble up") + # THEN we should first raise SchemaValidationError as the main exception with pytest.raises(SchemaValidationError): lambda_handler({}, {}) -def test_all_metric_units_string(metric, dimension, namespace): +def test_all_possible_metric_units(metric, dimension, namespace): - # metric unit as MetricUnit key e.g. "Seconds", "BytesPerSecond" + # GIVEN we add a metric for each metric unit supported by CloudWatch + # where metric unit as MetricUnit key e.g. "Seconds", "BytesPerSecond" for unit in MetricUnit: metric["unit"] = unit.name + # WHEN we iterate over all available metric unit keys from MetricUnit enum + # THEN we raise no MetricUnitError nor SchemaValidationError with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) my_metric.add_namespace(**namespace) - with pytest.raises(MetricUnitError): - metric["unit"] = "seconds" - with single_metric(**metric) as my_metric: - my_metric.add_dimension(**dimension) - my_metric.add_namespace(**namespace) - + # WHEN we iterate over all available metric unit keys from MetricUnit enum all_metric_units = [unit.value for unit in MetricUnit] # metric unit as MetricUnit value e.g. "Seconds", "Bytes/Second" for unit in all_metric_units: metric["unit"] = unit + # THEN we raise no MetricUnitError nor SchemaValidationError with single_metric(**metric) as my_metric: my_metric.add_dimension(**dimension) my_metric.add_namespace(**namespace) + + +def test_metrics_reuse_metric_set(metric, dimension, namespace): + # GIVEN Metrics is initialized + my_metrics = Metrics() + my_metrics.add_metric(**metric) + + # WHEN Metrics is initialized one more time + my_metrics_2 = Metrics() + + # THEN Both class instances should have the same metric set + assert my_metrics_2.metric_set == my_metrics.metric_set + + +def test_log_metrics_clear_metrics_after_invocation(metric, dimension, namespace): + # GIVEN Metrics is initialized + my_metrics = Metrics() + + my_metrics.add_metric(**metric) + my_metrics.add_dimension(**dimension) + my_metrics.add_namespace(**namespace) + + # WHEN log_metrics is used to flush metrics from memory + @my_metrics.log_metrics + def lambda_handler(evt, context): + return True + + lambda_handler({}, {}) + + # THEN metric set should be empty after function has been run + assert my_metrics.metric_set == {} diff --git a/tests/unit/test_tracing.py b/tests/unit/test_tracing.py index f79662601a5..72ce983334b 100644 --- a/tests/unit/test_tracing.py +++ b/tests/unit/test_tracing.py @@ -99,19 +99,7 @@ def handler(event, context): def test_tracer_method(mocker, dummy_response, provider_stub, in_subsegment_mock): provider = provider_stub(in_subsegment=in_subsegment_mock.in_subsegment) - tracer = Tracer(provider=provider, service="booking") - - @tracer.capture_method - def greeting(name, message): - return dummy_response - - greeting(name="Foo", message="Bar") - - assert in_subsegment_mock.in_subsegment.call_count == 1 - assert in_subsegment_mock.in_subsegment.call_args == mocker.call(name="## greeting") - assert in_subsegment_mock.put_metadata.call_args == mocker.call( - key="greeting response", value=dummy_response, namespace="booking" - ) + Tracer(provider=provider, service="booking") def test_tracer_custom_metadata(mocker, dummy_response, provider_stub):