Skip to content

Commit b44f72e

Browse files
authored
Add OpenTelemetry support (#70)
1 parent 88a45fa commit b44f72e

File tree

15 files changed

+441
-15
lines changed

15 files changed

+441
-15
lines changed

.buildkite/generatesteps.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@
33
import yaml
44

55

6-
def benchmark_to_steps(python, connection_class):
6+
def benchmark_to_steps(python, connection_class, nox_session):
77
return [
88
{
9-
"group": f":elasticsearch: :python: ES Serverless ({python}/{connection_class})",
9+
"group": f":elasticsearch: :python: {nox_session} {python} {connection_class}",
1010
"steps": [
1111
{
1212
"label": "Run tests",
1313
"agents": {"provider": "gcp"},
1414
"env": {
1515
"PYTHON_VERSION": f"{python}",
1616
"PYTHON_CONNECTION_CLASS": f"{connection_class}",
17+
"NOX_SESSION": f"{nox_session}",
1718
# For development versions
1819
# https://github.com/aio-libs/aiohttp/issues/6600
1920
"AIOHTTP_NO_EXTENSIONS": 1,
@@ -24,12 +25,12 @@ def benchmark_to_steps(python, connection_class):
2425
"EC_REGISTER_BACKEND": "appex-qa-team-cluster",
2526
"EC_ENV": "qa",
2627
"EC_REGION": "aws-eu-west-1",
27-
"EC_PROJECT_PREFIX": f"esv-client-python-test-{python}-{connection_class}",
28+
"EC_PROJECT_PREFIX": f"esv-client-python-{nox_session}-{python}-{connection_class}",
2829
},
2930
"command": "./.buildkite/run-tests",
3031
"artifact_paths": "junit/*-junit.xml",
3132
"retry": {"manual": False},
32-
"key": f"run_{python.replace('.', '_')}_{connection_class}",
33+
"key": f"run_{python.replace('.', '_')}_{connection_class}_{nox_session}",
3334
},
3435
{
3536
"label": "Teardown",
@@ -40,7 +41,7 @@ def benchmark_to_steps(python, connection_class):
4041
"EC_REGISTER_BACKEND": "appex-qa-team-cluster",
4142
"EC_ENV": "qa",
4243
"EC_REGION": "aws-eu-west-1",
43-
"EC_PROJECT_PREFIX": f"esv-client-python-test-{python}-{connection_class}",
44+
"EC_PROJECT_PREFIX": f"esv-client-python-{nox_session}-{python}-{connection_class}",
4445
},
4546
"command": ".buildkite/teardown-tests",
4647
"depends_on": f"run_{python.replace('.', '_')}_{connection_class}",
@@ -55,5 +56,7 @@ def benchmark_to_steps(python, connection_class):
5556
steps = []
5657
for python in ["3.9", "3.10", "3.11", "3.12"]:
5758
for connection_class in ["urllib3", "requests"]:
58-
steps.extend(benchmark_to_steps(python, connection_class))
59+
steps.extend(benchmark_to_steps(python, connection_class, "test"))
60+
steps.extend(benchmark_to_steps("3.9", "urllib3", "test_otel"))
61+
steps.extend(benchmark_to_steps("3.12", "urllib3", "test_otel"))
5962
print(yaml.dump({"steps": steps}, Dumper=yaml.Dumper, sort_keys=False))

.buildkite/run-tests

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ echo -e "--- :computer: Environment variables"
5151
echo -e "ELASTICSEARCH_URL $ELASTICSEARCH_URL"
5252
echo -e "PYTHON_VERSION $PYTHON_VERSION"
5353
echo -e "PYTHON_CONNECTION_CLASS $PYTHON_CONNECTION_CLASS"
54+
echo -e "NOX_SESSION $NOX_SESSION"
5455

5556
echo -e "--- :docker: Build elasticsearch-serverless-python container"
5657

@@ -74,4 +75,4 @@ docker run \
7475
--volume "$(pwd)/junit:/code/elasticsearch-serverless-python/junit" \
7576
--rm \
7677
elasticsearch-serverless-python \
77-
nox -s "test-$PYTHON_VERSION"
78+
nox -s ${NOX_SESSION}-${PYTHON_VERSION}
49.5 KB
Loading
43.2 KB
Loading
Loading

docs/guide/integrations.asciidoc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
You can find integration options and information on this page.
55

66

7+
[discrete]
8+
[[opentelemetry-intro]]
9+
=== OpenTelemetry instrumentation
10+
11+
The Python Elasticsearch client supports native OpenTelemetry instrumentation following the https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/[OpenTelemetry Semantic Conventions for Elasticsearch].
12+
Refer to the <<opentelemetry>> page for details.
13+
714
[discrete]
815
[[transport]]
916
=== Transport
@@ -53,3 +60,6 @@ es.options(
5360
------------------------------------
5461

5562
Type hints also allow tools like your IDE to check types and provide better auto-complete functionality.
63+
64+
65+
include::open-telemetry.asciidoc[]

docs/guide/open-telemetry.asciidoc

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
[[opentelemetry]]
2+
=== Using OpenTelemetry
3+
4+
You can use https://opentelemetry.io/[OpenTelemetry] to monitor the performance and behavior of your {es} requests through the Elasticsearch Python client.
5+
The Python client comes with built-in OpenTelemetry instrumentation that emits https://www.elastic.co/guide/en/apm/guide/current/apm-distributed-tracing.html[distributed tracing spans] by default.
6+
With that, applications using https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry[manual OpenTelemetry instrumentation] or https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry[automatic OpenTelemetry instrumentation] are enriched with additional spans that contain insightful information about the execution of the {es} requests.
7+
8+
The native instrumentation in the Python client follows the https://opentelemetry.io/docs/specs/semconv/database/elasticsearch/[OpenTelemetry Semantic Conventions for {es}]. In particular, the instrumentation in the client covers the logical layer of {es} requests. A single span per request is created that is processed by the service through the Python client. The following image shows a trace that records the handling of two different {es} requests: an `info` request and a `search` request.
9+
10+
[role="screenshot"]
11+
image::images/otel-waterfall-without-http.png[alt="Distributed trace with Elasticsearch spans",align="center"]
12+
13+
Usually, OpenTelemetry auto-instrumentation modules come with instrumentation support for HTTP-level communication. In this case, in addition to the logical {es} client requests, spans will be captured for the physical HTTP requests emitted by the client. The following image shows a trace with both, {es} spans (in blue) and the corresponding HTTP-level spans (in red) after having installed the ``opentelemetry-instrumentation-urllib3`` package:
14+
15+
[role="screenshot"]
16+
image::images/otel-waterfall-with-http.png[alt="Distributed trace with Elasticsearch spans",align="center"]
17+
18+
Advanced Python client behavior such as nodes round-robin and request retries are revealed through the combination of logical {es} spans and the physical HTTP spans. The following example shows a `search` request in a scenario with two nodes:
19+
20+
[role="screenshot"]
21+
image::images/otel-waterfall-retry.png[alt="Distributed trace with Elasticsearch spans",align="center"]
22+
23+
The first node is unavailable and results in an HTTP error, while the retry to the second node succeeds. Both HTTP requests are subsumed by the logical {es} request span (in blue).
24+
25+
[discrete]
26+
==== Setup the OpenTelemetry instrumentation
27+
28+
When using the https://opentelemetry.io/docs/languages/python/instrumentation/[manual Python OpenTelemetry instrumentation] or the https://opentelemetry.io/docs/languages/python/automatic/[OpenTelemetry Python agent], the Python client's OpenTelemetry instrumentation is enabled by default and uses the global OpenTelemetry SDK with the global tracer provider.
29+
If you're getting started with OpenTelemetry instrumentation, the following blog posts have step-by-step instructions to ingest and explore tracing data with the Elastic stack:
30+
31+
* https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry[Manual instrumentation with OpenTelemetry for Python applications]
32+
* https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry[Automatic instrumentation with OpenTelemetry for Python applications]
33+
34+
[discrete]
35+
=== Comparison with community instrumentation
36+
37+
The https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/elasticsearch/elasticsearch.html[commmunity OpenTelemetry Elasticsearch instrumentation] also instruments the client and sends OpenTelemetry traces, but was developed before the OpenTelemetry Semantic Conventions for {es}, so the traces attributes are inconsistent with other OpenTelemetry Elasticsearch client instrumentations. To avoid tracing the same requests twice, make sure to use only one instrumentation, either by uninstalling the opentelemetry-instrumentation-elasticsearch Python package or by <<opentelemetry-config-enable,disabling the native instrumentation>>.
38+
39+
[discrete]
40+
==== Configuring the OpenTelemetry instrumentation
41+
42+
You can configure this OpenTelemetry instrumentation through environment variables.
43+
The following configuration options are available.
44+
45+
[discrete]
46+
[[opentelemetry-config-enable]]
47+
===== Enable / Disable the OpenTelemetry instrumentation
48+
49+
With this configuration option you can enable (default) or disable the built-in OpenTelemetry instrumentation.
50+
51+
**Default:** `true`
52+
53+
|============
54+
| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_ENABLED`
55+
|============
56+
57+
[discrete]
58+
===== Capture search request bodies
59+
60+
Per default, the built-in OpenTelemetry instrumentation does not capture request bodies due to data privacy considerations. You can use this option to enable capturing of search queries from the request bodies of {es} search requests in case you wish to gather this information regardless. The options are to capture the raw search query or not capture it at all.
61+
62+
**Default:** `omit`
63+
64+
**Valid Options:** `omit`, `raw`
65+
66+
|============
67+
| Environment Variable | `OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_CAPTURE_SEARCH_QUERY`
68+
|============
69+
70+
[discrete]
71+
==== Overhead
72+
73+
The OpenTelemetry instrumentation (as any other monitoring approach) may come with a slight overhead on CPU, memory, and/or latency. The overhead may only occur when the instrumentation is enabled (default) and an OpenTelemetry SDK is active in the target application. When the instrumentation is disabled or no OpenTelemetry SDK is active within the target application, monitoring overhead is not expected when using the client.
74+
75+
Even in cases where the instrumentation is enabled and is actively used (by an OpenTelemetry SDK), the overhead is minimal and negligible in the vast majority of cases. In edge cases where there is a noticeable overhead, the <<opentelemetry-config-enable,instrumentation can be explicitly disabled>> to eliminate any potential impact on performance.

elasticsearch_serverless/_async/client/_base.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@
2727
HttpHeaders,
2828
ListApiResponse,
2929
ObjectApiResponse,
30+
OpenTelemetrySpan,
3031
TextApiResponse,
3132
)
3233
from elastic_transport.client_utils import DEFAULT, DefaultType
3334

35+
from ..._otel import OpenTelemetry
3436
from ...compat import warn_stacklevel
3537
from ...exceptions import (
3638
HTTP_EXCEPTIONS,
@@ -125,6 +127,7 @@ def __init__(self, _transport: AsyncTransport) -> None:
125127
self._retry_on_timeout: Union[DefaultType, bool] = DEFAULT
126128
self._retry_on_status: Union[DefaultType, Collection[int]] = DEFAULT
127129
self._verified_elasticsearch = False
130+
self._otel = OpenTelemetry()
128131

129132
@property
130133
def transport(self) -> AsyncTransport:
@@ -140,6 +143,34 @@ async def perform_request(
140143
body: Optional[Any] = None,
141144
endpoint_id: Optional[str] = None,
142145
path_parts: Optional[Mapping[str, Any]] = None,
146+
) -> ApiResponse[Any]:
147+
with self._otel.span(
148+
method,
149+
endpoint_id=endpoint_id,
150+
path_parts=path_parts or {},
151+
) as otel_span:
152+
response = await self._perform_request(
153+
method,
154+
path,
155+
params=params,
156+
headers=headers,
157+
body=body,
158+
otel_span=otel_span,
159+
)
160+
otel_span.set_elastic_cloud_metadata(response.meta.headers)
161+
return response
162+
163+
async def _perform_request(
164+
self,
165+
method: str,
166+
path: str,
167+
*,
168+
params: Optional[Mapping[str, Any]] = None,
169+
headers: Optional[Mapping[str, str]] = None,
170+
body: Optional[Any] = None,
171+
otel_span: OpenTelemetrySpan,
172+
endpoint_id: Optional[str] = None,
173+
path_parts: Optional[Mapping[str, Any]] = None,
143174
) -> ApiResponse[Any]:
144175
if headers:
145176
request_headers = self._headers.copy()
@@ -162,6 +193,7 @@ async def perform_request(
162193
retry_on_status=self._retry_on_status,
163194
retry_on_timeout=self._retry_on_timeout,
164195
client_meta=self._client_meta,
196+
otel_span=otel_span,
165197
)
166198

167199
# HEAD with a 404 is returned as a normal response

elasticsearch_serverless/_otel.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Licensed to Elasticsearch B.V. under one or more contributor
2+
# license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright
4+
# ownership. Elasticsearch B.V. licenses this file to you under
5+
# the Apache License, Version 2.0 (the "License"); you may
6+
# not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from __future__ import annotations
19+
20+
import contextlib
21+
import os
22+
from typing import TYPE_CHECKING, Generator, Mapping
23+
24+
if TYPE_CHECKING:
25+
from typing import Literal
26+
27+
try:
28+
from opentelemetry import trace
29+
30+
_tracer: trace.Tracer | None = trace.get_tracer("elasticsearch-api")
31+
except ModuleNotFoundError:
32+
_tracer = None
33+
34+
from elastic_transport import OpenTelemetrySpan
35+
36+
# Valid values for the enabled config are 'true' and 'false'. Default is 'true'.
37+
ENABLED_ENV_VAR = "OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_ENABLED"
38+
# Describes how to handle search queries in the request body when assigned to
39+
# a span attribute.
40+
# Valid values are 'omit' and 'raw'.
41+
# Default is 'omit' as 'raw' has security implications.
42+
BODY_STRATEGY_ENV_VAR = "OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_CAPTURE_SEARCH_QUERY"
43+
DEFAULT_BODY_STRATEGY = "omit"
44+
45+
46+
class OpenTelemetry:
47+
def __init__(
48+
self,
49+
enabled: bool | None = None,
50+
tracer: trace.Tracer | None = None,
51+
# TODO import Literal at the top-level when dropping Python 3.7
52+
body_strategy: 'Literal["omit", "raw"]' | None = None,
53+
):
54+
if enabled is None:
55+
enabled = os.environ.get(ENABLED_ENV_VAR, "true") == "true"
56+
self.tracer = tracer or _tracer
57+
self.enabled = enabled and self.tracer is not None
58+
59+
if body_strategy is not None:
60+
self.body_strategy = body_strategy
61+
else:
62+
self.body_strategy = os.environ.get(
63+
BODY_STRATEGY_ENV_VAR, DEFAULT_BODY_STRATEGY
64+
) # type: ignore[assignment]
65+
assert self.body_strategy in ("omit", "raw")
66+
67+
@contextlib.contextmanager
68+
def span(
69+
self,
70+
method: str,
71+
*,
72+
endpoint_id: str | None,
73+
path_parts: Mapping[str, str],
74+
) -> Generator[OpenTelemetrySpan, None, None]:
75+
if not self.enabled or self.tracer is None:
76+
yield OpenTelemetrySpan(None)
77+
return
78+
79+
span_name = endpoint_id or method
80+
with self.tracer.start_as_current_span(span_name) as otel_span:
81+
otel_span.set_attribute("http.request.method", method)
82+
otel_span.set_attribute("db.system", "elasticsearch")
83+
if endpoint_id is not None:
84+
otel_span.set_attribute("db.operation", endpoint_id)
85+
for key, value in path_parts.items():
86+
otel_span.set_attribute(f"db.elasticsearch.path_parts.{key}", value)
87+
88+
yield OpenTelemetrySpan(
89+
otel_span,
90+
endpoint_id=endpoint_id,
91+
body_strategy=self.body_strategy,
92+
)

elasticsearch_serverless/_sync/client/_base.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
HttpHeaders,
2727
ListApiResponse,
2828
ObjectApiResponse,
29+
OpenTelemetrySpan,
2930
TextApiResponse,
3031
Transport,
3132
)
3233
from elastic_transport.client_utils import DEFAULT, DefaultType
3334

35+
from ..._otel import OpenTelemetry
3436
from ...compat import warn_stacklevel
3537
from ...exceptions import (
3638
HTTP_EXCEPTIONS,
@@ -125,6 +127,7 @@ def __init__(self, _transport: Transport) -> None:
125127
self._retry_on_timeout: Union[DefaultType, bool] = DEFAULT
126128
self._retry_on_status: Union[DefaultType, Collection[int]] = DEFAULT
127129
self._verified_elasticsearch = False
130+
self._otel = OpenTelemetry()
128131

129132
@property
130133
def transport(self) -> Transport:
@@ -140,6 +143,34 @@ def perform_request(
140143
body: Optional[Any] = None,
141144
endpoint_id: Optional[str] = None,
142145
path_parts: Optional[Mapping[str, Any]] = None,
146+
) -> ApiResponse[Any]:
147+
with self._otel.span(
148+
method,
149+
endpoint_id=endpoint_id,
150+
path_parts=path_parts or {},
151+
) as otel_span:
152+
response = self._perform_request(
153+
method,
154+
path,
155+
params=params,
156+
headers=headers,
157+
body=body,
158+
otel_span=otel_span,
159+
)
160+
otel_span.set_elastic_cloud_metadata(response.meta.headers)
161+
return response
162+
163+
def _perform_request(
164+
self,
165+
method: str,
166+
path: str,
167+
*,
168+
params: Optional[Mapping[str, Any]] = None,
169+
headers: Optional[Mapping[str, str]] = None,
170+
body: Optional[Any] = None,
171+
otel_span: OpenTelemetrySpan,
172+
endpoint_id: Optional[str] = None,
173+
path_parts: Optional[Mapping[str, Any]] = None,
143174
) -> ApiResponse[Any]:
144175
if headers:
145176
request_headers = self._headers.copy()
@@ -162,6 +193,7 @@ def perform_request(
162193
retry_on_status=self._retry_on_status,
163194
retry_on_timeout=self._retry_on_timeout,
164195
client_meta=self._client_meta,
196+
otel_span=otel_span,
165197
)
166198

167199
# HEAD with a 404 is returned as a normal response

0 commit comments

Comments
 (0)