From f063c0caf5037abe8de21921a50489fd29237b77 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 19 Oct 2021 15:18:41 -0500 Subject: [PATCH] Migrate all user documentation to elastic.co --- .readthedocs.yml | 3 +- docs/guide/configuration.asciidoc | 391 +++++++++++++++++++++++- docs/guide/connecting.asciidoc | 132 ++++++-- docs/guide/connection-pool.asciidoc | 18 -- docs/guide/connection-selector.asciidoc | 20 -- docs/guide/examples.asciidoc | 18 +- docs/guide/installation.asciidoc | 2 +- docs/guide/integrations.asciidoc | 52 +++- docs/guide/overview.asciidoc | 20 +- docs/sphinx/api.rst | 106 +------ docs/sphinx/async.rst | 18 -- docs/sphinx/conf.py | 265 ++-------------- docs/sphinx/connection.rst | 93 ------ docs/sphinx/exceptions.rst | 33 +- docs/sphinx/index.rst | 284 +---------------- docs/sphinx/transports.rst | 54 ---- elasticsearch/_async/client/__init__.py | 52 ++-- elasticsearch/client.py | 2 +- noxfile.py | 3 +- 19 files changed, 639 insertions(+), 927 deletions(-) delete mode 100644 docs/guide/connection-pool.asciidoc delete mode 100644 docs/guide/connection-selector.asciidoc delete mode 100644 docs/sphinx/connection.rst delete mode 100644 docs/sphinx/transports.rst diff --git a/.readthedocs.yml b/.readthedocs.yml index e09a45ed3..7ad00c29f 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,6 +5,5 @@ sphinx: python: version: 3.7 install: - - method: pip - path: . - requirements: dev-requirements.txt + - path: . diff --git a/docs/guide/configuration.asciidoc b/docs/guide/configuration.asciidoc index 473ba1e53..57033708e 100644 --- a/docs/guide/configuration.asciidoc +++ b/docs/guide/configuration.asciidoc @@ -4,9 +4,392 @@ This page contains information about the most important configuration options of the Python {es} client. -* <> -* <> +[discrete] +[[tls-and-ssl]] +=== TLS/SSL -include::connection-pool.asciidoc[] -include::connection-selector.asciidoc[] \ No newline at end of file +The options in this section can only be used when the node is configured for HTTPS. An error will be raised if using these options with an HTTP node. + +[discrete] +==== Verifying server certificates + +The typical route to verify a cluster certificate is via a "CA bundle" which can be specified via the `ca_certs` parameter. If no options are given and the https://github.com/certifi/python-certifi[certifi package] is installed then certifi's CA bundle is used by default. + +If you have your own CA bundle to use you can configure via the `ca_certs` parameter: + +[source,python] +------------------------------------ +es = Elasticsearch( + "https://...", + ca_certs="/path/to/certs.pem" +) +------------------------------------ + +If using a generated certificate or certificate with a known fingerprint you can use the `ssl_assert_fingerprint` to specify the fingerprint which tries to match the server's leaf certificate during the TLS handshake. If there is any matching certificate the connection is verified, otherwise a `TlsError` is raised. + +In Python 3.9 and earlier only the leaf certificate will be verified but in Python 3.10+ private APIs are used to verify any certificate in the certificate chain. This helps when using certificates that are generated on a multi-node cluster. + +[source,python] +------------------------------------ +es = Elasticsearch( + "https://...", + ssl_assert_fingerprint=( + "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3" + ) +) +------------------------------------ + +To disable certificate verification use the `verify_certs=False` parameter. This option should be avoided in production, instead use the other options to verify the clusters' certificate. + +[source,python] +------------------------------------ +es = Elasticsearch( + "https://...", + verify_certs=False +) +------------------------------------ + +[discrete] +==== TLS versions + +Configuring the minimum TLS version to connect to is done via the `ssl_version` parameter. By default this is set to a minimum value of TLSv1.2. In Python 3.7+ you can use the new `ssl.TLSVersion` enumeration to specify versions. + +[source,python] +------------------------------------ +import ssl + +# Python 3.6 +es = Elasticsearch( + ..., + ssl_version=ssl.PROTOCOL_TSLv1_2 +) + +# Python 3.7+ +es = Elasticsearch( + ..., + ssl_version=ssl.TLSVersion.TLSv1_2 +) +------------------------------------ + +[discrete] +==== Client TLS certificate authentication + +Elasticsearch can be configured to authenticate clients via TLS client certificates. Client certificate and keys can be configured via the `client_cert` and `client_key` parameters: + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + client_cert="/path/to/cert.pem", + client_key="/path/to/key.pem", +) +------------------------------------ + + +[discrete] +==== Using an SSLContext + +For advanced users an `ssl.SSLContext` object can be used for configuring TLS via the `ssl_context` parameter. The `ssl_context` parameter can't be combined with any other TLS options except for the `ssl_assert_fingerprint` parameter. + +[source,python] +------------------------------------ +import ssl + +# Create and configure an SSLContext +ctx = ssl.create_default_context() +ctx.load_verify_locations(...) + +es = Elasticsearch( + ..., + ssl_context=ctx +) +------------------------------------ + + +[discrete] +[[compression]] +=== HTTP compression + +Compression of HTTP request and response bodies can be enabled with the `http_compress` parameter. +If enabled then HTTP request bodies will be compressed with `gzip` and HTTP responses will include +the `Accept-Encoding: gzip` HTTP header. By default compression is disabled. + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + http_compress=True # Enable compression! +) +------------------------------------ + +HTTP compression is recommended to be enabled when requests are traversing the network. + + +[discrete] +[[timeouts]] +=== Request timeouts + +Requests can be configured to timeout if taking too long to be serviced. The `request_timeout` parameter can be passed via the client constructor or the client `.options()` method. When the request times out the node will raise a `ConnectionTimeout` exception which can trigger retries. + +Setting `request_timeout` to `None` will disable timeouts. + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + request_timeout=10 # 10 second timeout +) + +# Search request will timeout in 5 seconds +es.options(request_timeout=5).search(...) +------------------------------------ + +[discrete] +==== API and server timeouts + +There are API-level timeouts to take into consideration when making requests which can cause the request to timeout on server-side rather than client-side. You may need to configure both a transport and API level timeout for long running operations. + +In the example below there are three different configurable timeouts for the `cluster.health` API all with different meanings for the request: + +[source,python] +------------------------------------ +es.options( + # Amount of time to wait for an HTTP response to start. + request_timeout=30 +).cluster.health( + # Amount of time to wait to collect info on all nodes. + timeout=30, + # Amount of time to wait for info from the master node. + master_timeout=10, +) +------------------------------------ + + +[discrete] +[[retries]] +=== Retries + +Requests can be retried if they don't return with a successful response. This provides a way for requests to be resilient against transient failures or overloaded nodes. + +The maximum number of retries per request can be configured via the `max_retries` parameter. Setting this parameter to 0 disables retries. This parameter can be set in the client constructor or per-request via the client `.options()` method: + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + max_retries=5 +) + +# For this API request we disable retries with 'max_retries=0' +es.options(max_retries=0).index( + index="blogs", + document={ + "title": "..." + } +) +------------------------------------ + +[discrete] +==== Retrying on connection errors and timeouts + +Connection errors are automatically retried if retries are enabled. Retrying requests on connection timeouts can be enabled or disabled via the `retry_on_timeout` parameter. This parameter can be set on the client constructor or via the client `.options()` method: + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + retry_on_timeout=True +) +es.options(retry_on_timeout=False).info() +------------------------------------ + +[discrete] +==== Retrying status codes + +By default if retries are enabled `retry_on_status` is set to `(429, 502, 503, 504)`. This parameter can be set on the client constructor or via the client `.options()` method. Setting this value to `False` or `()` will disable the default behavior. + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + retry_on_status=False +) + +# Retry this API on '500 Internal Error' statuses +es.options(retry_on_status=[500]).index( + index="blogs", + document={ + "title": "..." + } +) +------------------------------------ + +[discrete] +==== Ignoring status codes + +By default an `ApiError` exception will be raised for any non-2XX HTTP requests that exhaust retries, if any. If you're expecting an HTTP error from the API but aren't interested in raising an exception you can use the `ignore_status` parameter via the client `.options()` method. + +A good example where this is useful is setting up or cleaning up resources in a cluster in a robust way: + +[source,python] +------------------------------------ +es = Elasticsearch(...) + +# API request is robust against the index not existing: +resp = es.options(ignore_status=404).indices.delete(index="delete-this") +resp.meta.status # Can be either '2XX' or '404' + +# API request is robust against the index already existing: +resp = es.options(ignore_status=[400]).indices.create( + index="create-this", + mapping={ + "properties": {"field": {"type": "integer"}} + } +) +resp.meta.status # Can be either '2XX' or '400' +------------------------------------ + +When using the `ignore_status` parameter the error response will be returned serialized just like a non-error response. In these cases it can be useful to inspect the HTTP status of the response. To do this you can inspect the `resp.meta.status`. + +[discrete] +[[sniffing]] +=== Sniffing for new nodes + +Additional nodes can be discovered by a process called "sniffing" where the client will query the cluster for more nodes that can handle requests. + +Sniffing can happen at three different times: on client instantiation, before requests, and on a node failure. These three behaviors can be enabled and disabled with the `sniff_on_start`, `sniff_before_requests`, and `sniff_on_node_failure` parameters. + +IMPORTANT: When using an HTTP load balancer or proxy you cannot use sniffing functionality as the cluster would supply the client with IP addresses to directly connect to the cluster, circumventing the load balancer. Depending on your configuration this might be something you don't want or break completely. + +[discrete] +==== Waiting between sniffing attempts + +To avoid needlessly sniffing too often there is a delay between attempts to discover new nodes. This value can be controlled via the `min_wait_between_sniffing` parameter. + +[discrete] +==== Filtering nodes which are sniffed + +By default nodes which are marked with only a `master` role will not be used. To change the behavior the parameter `sniff_filter` + + +[discrete] +[[node-pool]] +=== Node Pool + +[discrete] +==== Selecting a node from the pool + +You can specify a node selector pattern via the `node_selector_class` parameter. The supported values are `round_robin` and `random`. Default is `round_robin`. + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + node_selector_class="round_robin" +) +------------------------------------ + +Custom selectors are also supported: + +[source,python] +------------------------------------ +from elastic_transport import NodeSelector + +class CustomSelector(NodeSelector): + def select(nodes): ... + +es = Elasticsearch( + ..., + node_selector_class=CustomSelector +) +------------------------------------ + +[discrete] +==== Marking nodes dead and alive + +Individual nodes of Elasticsearch may have transient connectivity or load issues which may make them unable to service requests. To combat this the pool of nodes will detect when a node isn't able to service requests due to transport or API errors. + +After a node has been timed out it will be moved back to the set of "alive" nodes but only after the node returns a successful response will the node be marked as "alive" in terms of consecutive errors. + +The `dead_node_backoff_factor` and `max_dead_node_backoff` parameters can be used to configure how long the node pool will put the node into timeout with each consecutive failure. Both parameters use a unit of seconds. + +The calculation is equal to `min(dead_node_backoff_factor * (2 ** (consecutive_failures - 1)), max_dead_node_backoff)`. + + +[discrete] +[[serializer]] +=== Serializers + +Serializers transform bytes on the wire into native Python objects and vice-versa. By default the client ships with serializers for `application/json`, `application/x-ndjson`, `text/*`, and `application/mapbox-vector-tile`. + +You can define custom serializers via the `serializers` parameter: + +[source,python] +------------------------------------ +from elasticsearch import Elasticsearch, JsonSerializer + +class JsonSetSerializer(Jsonserializer): + """Custom JSON serializer that handles Python sets""" + def default(value: Any) -> Any: + if isinstance(value, set): + return list(value) + return super().default(value) + +es = Elasticsearch( + ..., + # Serializers are a mapping of 'mimetype' to Serializer class. + serializers={"application/json": JsonSetSerializer} +) +------------------------------------ + + +[discrete] +[[nodes]] +=== Nodes + +[discrete] +==== Node implementations + +The default node class for synchronous I/O is `urllib3` and the default node class for asynchronous I/O is `aiohttp`. + +For all of the built-in HTTP node implementations like `urllib3`, `requests`, and `aiohttp` you can specify with a simple string to the `node_class` parameter: + +[source,python] +------------------------------------ +from elasticsearch import Elasticsearch + +es = Elasticsearch( + ..., + node_class="requests" +) +------------------------------------ + +You can also specify a custom node implementation via the `node_class` parameter: + +[source,python] +------------------------------------ +from elasticsearch import Elasticsearch +from elastic_transport import Urllib3HttpNode + +class CustomHttpNode(Urllib3HttpNode): + ... + +es = Elasticsearch( + ... + node_class=CustomHttpNode +) +------------------------------------ + +[discrete] +==== HTTP connections per node + +Each node contains its own pool of HTTP connections to allow for concurrent requests. This value is configurable via the `connections_per_node` parameter: + +[source,python] +------------------------------------ +es = Elasticsearch( + ..., + connections_per_node=5 +) +------------------------------------ diff --git a/docs/guide/connecting.asciidoc b/docs/guide/connecting.asciidoc index 629e81a23..2384a0bcc 100644 --- a/docs/guide/connecting.asciidoc +++ b/docs/guide/connecting.asciidoc @@ -5,26 +5,49 @@ This page contains the information you need to connect the Client with {es}. [discrete] -[[authentication]] -=== Authentication +[[connect-url]] +==== Connecting with URLs -This section contains code snippets to show you how to connect to various {es} -providers. +A single node can be specified via a `scheme`, `host`, `port`, and optional `path_prefix`. These values can either be specified manually via a URL in a string, dictionary, `NodeConfig`, or a list of these values. You must specify at least `scheme`, `host` and `port` for each node. All of the following are valid configurations: + +[source,python] +---------------------------- +from elasticsearch import Elasticsearch +# Single node via URL +es = Elasticsearch("http://localhost:9200") + +# Multiple nodes via URL +es = Elasticsearch([ + "http://localhost:9200", + "http://localhost:9201", + "http://localhost:9202" +]) + +# Single node via dictionary +es = Elasticsearch({"scheme": "http", "host": "localhost", "port": 9200}) + +# Multiple nodes via dictionary +es = Elasticsearch([ + {"scheme": "http", "host": "localhost", "port": 9200}, + {"scheme": "http", "host": "localhost", "port": 9201}, +]) +---------------------------- [discrete] -[[auth-ec]] -==== Elastic Cloud +[[connect-ec]] +==== Connecting to Elastic Cloud Cloud ID is an easy way to configure your client to work with your Elastic Cloud -deployment. Combine the `cloud_id` with either `http_auth` or `api_key` to +deployment. Combine the `cloud_id` with either `basic_auth` or `api_key` to authenticate with your Elastic Cloud deployment. Using `cloud_id` enables TLS verification and HTTP compression by default and sets the port to 443 unless otherwise overwritten via the port parameter or the -port value encoded within `cloud_id`. Using Cloud ID also disables sniffing. +port value encoded within `cloud_id`. Using Cloud ID also disables sniffing as +a proxy is in use. -[source,py] +[source,python] ---------------------------- from elasticsearch import Elasticsearch @@ -35,35 +58,94 @@ es = Elasticsearch( [discrete] -[[auth-http]] -==== HTTP Authentication +[[authentication]] +=== Authentication + +This section contains code snippets to show you how to connect to various {es} +providers. All authentication methods are supported on the client constructor +or via the per-request `.options()` method: + +[source,python] +---------------------------- +from elasticsearch import Elasticsearch + +# Authenticate from the constructor +es = Elasticsearch( + "http://localhost:9200", + basic_auth=("username", "password") +) + +# Authenticate via the .options() method: +es.options( + basic_auth=("username", "password") +).indices.get(index="*") + +# You can persist the authenticated client to use +# later or use for multiple API calls: +auth_client = es.options( + api_key=("api-key-id", "api-key-secret") +) +for i in range(10): + auth_client.index( + index="example-index", + document={"field": i} + ) +---------------------------- + + +[discrete] +[[auth-basic]] +==== HTTP Basic authentication (Username and Password) -HTTP authentication uses the `http_auth` parameter by passing in a username and +HTTP Basic authentication uses the `basic_auth` parameter by passing in a username and password within a tuple: -[source,py] +[source,python] ---------------------------- from elasticsearch import Elasticsearch +# Adds the HTTP header 'Authorization: Basic ' es = Elasticsearch( - http_auth=(“username”, “password”) + basic_auth=(“username”, “password”) +) +---------------------------- + + +[discrete] +[[auth-bearer]] +==== HTTP Bearer authentication + +HTTP Bearer authentication uses the `bearer_auth` parameter by passing the token +as a string. This authentication method is used by +https://www.elastic.co/guide/en/elasticsearch/reference/master/security-api-create-service-token.html[Service Account Tokens] +and https://www.elastic.co/guide/en/elasticsearch/reference/master/security-api-get-token.html[Bearer Tokens]. + +[source,python] +---------------------------- +from elasticsearch import Elasticsearch + +# Adds the HTTP header 'Authorization: Bearer token-value' +es = Elasticsearch( + bearer_token="token-value" ) ---------------------------- [discrete] [[auth-apikey]] -==== ApiKey authentication +==== API Key authentication You can configure the client to use {es}'s API Key for connecting to your -cluster. +cluster. Note that you need the values of `id` and `api_key` to +[authenticate via an API Key](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html). -[source,py] +[source,python] ---------------------------- from elasticsearch import Elasticsearch +# Adds the HTTP header 'Authorization: ApiKey ' es = Elasticsearch( - api_key=(“api_key_id”, “api_key_secret”) + api_key=(“api_key.id”, “api_key.api_key”) ) ---------------------------- @@ -90,11 +172,15 @@ This practice does not only improve performance but also enables background func https://www.elastic.co/blog/elasticsearch-sniffing-best-practices-what-when-why-how[sniffing]. The following examples provide a skeleton for the best practices. +IMPORTANT: The async client shouldn't be used within Function-as-a-Service as a new event + loop must be started for each invocation. Instead the synchronous `Elasticsearch` + client is recommended. + [discrete] [[connecting-faas-gcp]] ==== GCP Cloud Functions -[source,py] +[source,python] ---------------------------- from elasticsearch import Elasticsearch @@ -111,7 +197,7 @@ def main(request): [[connecting-faas-aws]] ==== AWS Lambda -[source,py] +[source,python] ---------------------------- from elasticsearch import Elasticsearch @@ -128,7 +214,7 @@ def main(event, context): [[connecting-faas-azure]] ==== Azure Functions -[source,py] +[source,python] ---------------------------- import azure.functions as func from elasticsearch import Elasticsearch @@ -142,10 +228,6 @@ def main(request: func.HttpRequest) -> func.HttpResponse: ---------------------------- -IMPORTANT: The async client shouldn't be used within Function-as-a-Service as a new event - loop must be started for each invocation. Instead the synchronous `Elasticsearch` - client is recommended. - Resources used to assess these recommendations: * https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations[GCP Cloud Functions: Tips & Tricks] diff --git a/docs/guide/connection-pool.asciidoc b/docs/guide/connection-pool.asciidoc deleted file mode 100644 index 9d71217c9..000000000 --- a/docs/guide/connection-pool.asciidoc +++ /dev/null @@ -1,18 +0,0 @@ -[[connection-pool]] -=== Connection pool - -Connection pool is a container that holds the `Connection` instances, manages -the selection process (via a `ConnectionSelector`) and dead connections. - -Initially connections are stored in the class as a list and – along with the -connection options – get passed to the `ConnectionSelector` instance for future -reference. - -Upon each request, the `Transport` asks for a `Connection` via the -`get_connection` method. If the connection fails, it is marked as dead (via -`mark_dead`) and put on a timeout. When the timeout is over the connection is -resurrected and returned to the live pool. A connection that has been previously -marked as dead and then succeeds is marked as live (its fail count is deleted). - -For reference information, refer to the -https://elasticsearch-py.readthedocs.io/en/latest/connection.html#connection-pool[full {es} Python documentation]. \ No newline at end of file diff --git a/docs/guide/connection-selector.asciidoc b/docs/guide/connection-selector.asciidoc deleted file mode 100644 index 39d340c2b..000000000 --- a/docs/guide/connection-selector.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -[[connection-selector]] -=== Connection selector - -Connection selector is a simple class used to select a connection from a list of -currently live connection instances. Initially, it is passed a dictionary -containing all the connections options which it can then use during the -selection process. When the _select_ method is called it is given a list of -currently live connections to choose from. - -The options dictionary is passed to `Transport` as the hosts parameter and the -same is used to construct the connection object itself. When the connection was -created based on information retrieved from the cluster via the sniffing -process, it is the dictionary returned by the `host_info_callback`. - -Example of where this might be useful is a zone-aware selector that would only -select connections from its own zones and only fall back to other connections -where there would be none in its zones. - -For reference information, refer to the -https://elasticsearch-py.readthedocs.io/en/latest/connection.html#connection-selector[full {es} Python documentation]. \ No newline at end of file diff --git a/docs/guide/examples.asciidoc b/docs/guide/examples.asciidoc index d45ef4039..a12bced39 100644 --- a/docs/guide/examples.asciidoc +++ b/docs/guide/examples.asciidoc @@ -22,15 +22,15 @@ To index a document, you need to specify three pieces of information: `index`, ---------------------------- from datetime import datetime from elasticsearch import Elasticsearch -es = Elasticsearch() +es = Elasticsearch('https://localhost:9200') doc = { 'author': 'author_name', 'text': 'Interensting content...', 'timestamp': datetime.now(), } -res = es.index(index="test-index", id=1, body=doc) -print(res['result']) +resp = es.index(index="test-index", id=1, document=doc) +print(resp['result']) ---------------------------- @@ -42,8 +42,8 @@ To get a document, you need to specify its `index` and `id`: [source,py] ---------------------------- -res = es.get(index="test-index", id=1) -print(res['_source']) +resp = es.get(index="test-index", id=1) +print(resp['_source']) ---------------------------- @@ -67,7 +67,7 @@ The `search()` method returns results that are matching a query: [source,py] ---------------------------- -res = es.search(index="test-index", body={"query": {"match_all": {}}}) +resp = es.search(index="test-index", query={"match_all": {}}) print("Got %d Hits:" % res['hits']['total']['value']) for hit in res['hits']['hits']: print("%(timestamp)s %(author)s: %(text)s" % hit["_source"]) @@ -85,15 +85,15 @@ To update a document, you need to specify three pieces of information: `index`, ---------------------------- from datetime import datetime from elasticsearch import Elasticsearch -es = Elasticsearch() +es = Elasticsearch('https://localhost:9200') doc = { 'author': 'author_name', 'text': 'Interensting modified content...', 'timestamp': datetime.now(), } -res = es.update(index="test-index", id=1, body=doc) -print(res['result']) +resp = es.update(index="test-index", id=1, document=doc) +print(resp['result']) ---------------------------- diff --git a/docs/guide/installation.asciidoc b/docs/guide/installation.asciidoc index 68444334e..623add181 100644 --- a/docs/guide/installation.asciidoc +++ b/docs/guide/installation.asciidoc @@ -17,4 +17,4 @@ $ python -m pip install elasticsearch[async] -------------------------------------------- Read more about -https://elasticsearch-py.readthedocs.io/en/master/async.html[how to use Asyncio with this project]. \ No newline at end of file +https://elasticsearch-py.readthedocs.io/en/master/async.html[how to use Asyncio with this project]. diff --git a/docs/guide/integrations.asciidoc b/docs/guide/integrations.asciidoc index b1df4e81d..83b88e862 100644 --- a/docs/guide/integrations.asciidoc +++ b/docs/guide/integrations.asciidoc @@ -8,20 +8,48 @@ You can find integration options and information on this page. [[transport]] === Transport -The `Transport` class is a subclass of the -https://elasticsearch-py.readthedocs.io/en/latest/connection.html[Connection Layer API] -that contains all the classes that are responsible for handling the connection -to the {es} cluster. +The handling of connections, retries, and pooling is handled by the https://github.com/elastic/elastic-transport-python[Elastic Transport Python] library. +Documentation on the low-level classes is available on https://elastic-transport-python.readthedocs.io[Read the Docs]. -The `Transport` class is an encapsulation of the transport-related logic of the -Python client. For the exhaustive list of parameters, refer to the -https://elasticsearch-py.readthedocs.io/en/latest/connection.html#transport[documentation]. + +[discrete] +[[opaque-id]] +=== Tracking requests with Opaque ID + +You can enrich your requests against Elasticsearch with an identifier string, that allows you to discover this identifier in https://www.elastic.co/guide/en/elasticsearch/reference/current/logging.html#deprecation-logging[deprecation logs], to support you with https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-slowlog.html#_identifying_search_slow_log_origin[identifying search slow log origin] +or to help with https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html#_identifying_running_tasks[identifying running tasks]. + +The opaque ID can be set via the `opaque_id` parameter via the client `.options()` method: + +[source,python] +------------------------------------ +es = Elasticsearch(...) +es.options(opaque_id="request-id-...").search(...) +------------------------------------ [discrete] -[[transport-classes]] -==== Transport classes +[[type-hints]] +=== Type Hints + +Starting in `elasticsearch-py` v7.10.0 the library now ships with https://www.python.org/dev/peps/pep-0484[type hints] and supports basic static type analysis with tools like http://mypy-lang.org[Mypy] and https://github.com/microsoft/pyright[Pyright]. + +If we write a script that has a type error like using `request_timeout` with a `str` argument instead of `float` and then run Mypy on the script: + +[source,python] +------------------------------------ +# script.py +from elasticsearch import Elasticsearch + +es = Elasticsearch(...) +es.options( + request_timeout="5" # type error! +).search(...) + +# $ mypy script.py +# script.py:5: error: Argument "request_timeout" to "search" of "Elasticsearch" has +# incompatible type "str"; expected "Union[int, float, None]" +# Found 1 error in 1 file (checked 1 source file) +------------------------------------ -The `Transport` classes can be used to maintain connection with an {es} cluster. -For the reference information of these classes, refer to the -https://elasticsearch-py.readthedocs.io/en/latest/transports.html[documentation]. +Type hints also allow tools like your IDE to check types and provide better auto-complete functionality. diff --git a/docs/guide/overview.asciidoc b/docs/guide/overview.asciidoc index a8f8fefb0..674d7a7f2 100644 --- a/docs/guide/overview.asciidoc +++ b/docs/guide/overview.asciidoc @@ -3,7 +3,7 @@ This is the official low-level Python client for {es}. Its goal is to provide common ground for all {es}-related code in Python. For this reason, the client -is designed to be unopinionated and extendable. Full documentation is available +is designed to be unopinionated and extendable. An API reference is available on https://elasticsearch-py.readthedocs.io[Read the Docs]. @@ -28,10 +28,10 @@ Simple use-case: >>> from datetime import datetime >>> from elasticsearch import Elasticsearch -# By default we connect to localhost:9200 ->>> es = Elasticsearch() +# Connect to 'http://localhost:9200' +>>> es = Elasticsearch("http://localhost:9200") -# Datetimes will be serialized... +# Datetimes will be serialized: >>> es.index(index="my-index-000001", doc_type="test-type", id=42, body={"any": "data", "timestamp": datetime.now()}) {'_id': '42', '_index': 'my-index-000001', '_type': 'test-type', '_version': 1, 'ok': True} @@ -40,13 +40,6 @@ Simple use-case: {'any': 'data', 'timestamp': '2013-05-12T19:45:31.804229'} ------------------------------------ -[NOTE] -All the API calls map the raw REST API as closely as possible, including -the distinction between required and optional arguments to the calls. This -means that the code makes distinction between positional and keyword arguments; -we, however, recommend that people use keyword arguments for all calls for -consistency and safety. - TIP: For an elaborate example of how to ingest data into Elastic Cloud, refer to {cloud}/ec-getting-started-python.html[this page]. @@ -64,8 +57,7 @@ The client's features include: * Load balancing (with pluggable selection strategy) across all available nodes -* Failed connection penalization (time based - failed connections won't be - retried until a timeout is reached) +* Node timeouts on transient errors * Thread safety @@ -93,4 +85,4 @@ It also provides an optional https://elasticsearch-dsl.readthedocs.org/en/latest/persistence.html#doctype[persistence layer] for working with documents as Python objects in an ORM-like fashion: defining mappings, retrieving and saving documents, wrapping the document data -in user-defined classes. \ No newline at end of file +in user-defined classes. diff --git a/docs/sphinx/api.rst b/docs/sphinx/api.rst index ee50a131d..60fc55072 100644 --- a/docs/sphinx/api.rst +++ b/docs/sphinx/api.rst @@ -1,110 +1,24 @@ .. _api: -API Documentation -================= +Elasticsearch API Reference +=========================== -All the API calls map the raw REST api as closely as possible, including the -distinction between required and optional arguments to the calls. This means -that the code makes distinction between positional and keyword arguments; we, -however, recommend that people **use keyword arguments for all calls for -consistency and safety**. +All the API calls map the raw REST API as closely as possible, including the +distinction between required and optional arguments to the calls. Keyword +arguments are required for all .. note:: - for compatibility with the Python ecosystem we use ``from_`` instead of - ``from`` and ``doc_type`` instead of ``type`` as parameter names. + Some API parameters in Elasticsearch are reserved keywords in Python. + For example the ``from`` query parameter for pagination would be + aliased as ``from_``. -Global Options --------------- - -Some parameters are added by the client itself and can be used in all API -calls. - -Ignore -~~~~~~ - -An API call is considered successful (and will return a response) if -elasticsearch returns a 2XX response. Otherwise an instance of -:class:`~elasticsearch.TransportError` (or a more specific subclass) will be -raised. You can see other exception and error states in :ref:`exceptions`. If -you do not wish an exception to be raised you can always pass in an ``ignore`` -parameter with either a single status code that should be ignored or a list of -them: - -.. code-block:: python - - from elasticsearch import Elasticsearch - es = Elasticsearch() - - # ignore 400 cause by IndexAlreadyExistsException when creating an index - es.indices.create(index='test-index', ignore=400) - - # ignore 404 and 400 - es.indices.delete(index='test-index', ignore=[400, 404]) - - -Timeout -~~~~~~~ - -Global timeout can be set when constructing the client (see -:class:`~elasticsearch.Connection`'s ``timeout`` parameter) or on a per-request -basis using ``request_timeout`` (float value in seconds) as part of any API -call, this value will get passed to the ``perform_request`` method of the -connection class: - -.. code-block:: python - - # only wait for 1 second, regardless of the client's default - es.cluster.health(wait_for_status='yellow', request_timeout=1) - -.. note:: - - Some API calls also accept a ``timeout`` parameter that is passed to - Elasticsearch server. This timeout is internal and doesn't guarantee that the - request will end in the specified time. - -Tracking Requests with Opaque ID -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can enrich your requests against Elasticsearch with an identifier string, that allows you to discover this identifier -in `deprecation logs `_, to support you with -`identifying search slow log origin `_ -or to help with `identifying running tasks `_. - - .. code-block:: python - - import elasticsearch - - # You can add to the client to apply to all requests - client = elasticsearch.Elasticsearch(opaque_id="app17@dc06.eu_user1234") - - # Or you can apply per-request for more granularity. - resp = client.get(index="test", id="1", opaque_id="app17@dc06.eu_user1234") - - -.. py:module:: elasticsearch - -Response Filtering -~~~~~~~~~~~~~~~~~~ - -The ``filter_path`` parameter is used to reduce the response returned by -elasticsearch. For example, to only return ``_id`` and ``_type``, do: - -.. code-block:: python - - es.search(index='test-index', filter_path=['hits.hits._id', 'hits.hits._type']) - -It also supports the ``*`` wildcard character to match any field or part of a -field's name: - -.. code-block:: python - - es.search(index='test-index', filter_path=['hits.hits._*']) - Elasticsearch ------------- +.. py:module:: elasticsearch + .. autoclass:: Elasticsearch :members: diff --git a/docs/sphinx/async.rst b/docs/sphinx/async.rst index 08ddfa337..3cc4427ac 100644 --- a/docs/sphinx/async.rst +++ b/docs/sphinx/async.rst @@ -223,21 +223,3 @@ AsyncElasticsearch .. autoclass:: AsyncElasticsearch :members: - -AsyncTransport -~~~~~~~~~~~~~~ - - .. autoclass:: AsyncTransport - :members: - -AsyncConnection -~~~~~~~~~~~~~~~~~ - - .. autoclass:: AsyncConnection - :members: - -AIOHttpConnection -~~~~~~~~~~~~~~~~~ - - .. autoclass:: AIOHttpConnection - :members: diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index 4396b6abf..4b26b55ff 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -1,37 +1,27 @@ # -*- coding: utf-8 -*- -# Licensed to Elasticsearch B.V under one or more agreements. -# Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -# See the LICENSE file in the project root for more information - -# -# Elasticsearch documentation build configuration file, created by -# sphinx-quickstart on Mon May 6 15:38:41 2013. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# This file is execfile()d with the current directory set to its containing dir. +# http://www.apache.org/licenses/LICENSE-2.0 # -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. -import os import datetime -import elasticsearch - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ----------------------------------------------------- +import os -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +import elasticsearch -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest"] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] autoclass_content = "both" @@ -41,68 +31,18 @@ # The suffix of source filenames. source_suffix = ".rst" -# The encoding of source files. -# source_encoding = 'utf-8-sig' - # The master toctree document. master_doc = "index" # General information about the project. -project = u"Elasticsearch" -copyright = u"%d, Elasticsearch B.V" % datetime.date.today().year +project = "Python Elasticsearch client" +copyright = "%d, Elasticsearch B.V" % datetime.date.today().year -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. - -# The short X.Y version. version = elasticsearch.__versionstr__ -# The full version, including alpha/beta/rc tags. release = version -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ["_build", "examples"] - -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. - on_rtd = os.environ.get("READTHEDOCS", None) == "True" if not on_rtd: # only import and set the theme if we're building docs locally @@ -111,165 +51,10 @@ html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -# html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -# html_domain_indices = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = "Elasticsearchdoc" - - -# -- Options for LaTeX output -------------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # 'preamble': '', +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "elastic-transport": ( + "https://elastic-transport-python.readthedocs.io/en/latest", + None, + ), } - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ( - "index", - "Elasticsearch.tex", - u"Elasticsearch Documentation", - u"Honza Král", - "manual", - ) -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# If true, show page references after internal links. -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ("index", "elasticsearch-py", u"Elasticsearch Documentation", [u"Honza Král"], 1) -] - -# If true, show URL addresses after external links. -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------------ - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - "index", - "Elasticsearch", - u"Elasticsearch Documentation", - u"Honza Král", - "Elasticsearch", - "One line description of project.", - "Miscellaneous", - ) -] - -# Documents to append as an appendix to all manuals. -# texinfo_appendices = [] - -# If false, no module index is generated. -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# texinfo_no_detailmenu = False diff --git a/docs/sphinx/connection.rst b/docs/sphinx/connection.rst deleted file mode 100644 index bdf137f4a..000000000 --- a/docs/sphinx/connection.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. _connection_api: - -Connection Layer API -==================== - -All of the classes responsible for handling the connection to the Elasticsearch -cluster. The default subclasses used can be overridden by passing parameters to the -:class:`~elasticsearch.Elasticsearch` class. All of the arguments to the client -will be passed on to :class:`~elasticsearch.Transport`, -:class:`~elasticsearch.ConnectionPool` and :class:`~elasticsearch.Connection`. - -For example if you wanted to use your own implementation of the -:class:`~elasticsearch.ConnectionSelector` class you can just pass in the -``selector_class`` parameter. - -.. note:: - - :class:`~elasticsearch.ConnectionPool` and related options (like - ``selector_class``) will only be used if more than one connection is defined. - Either directly or via the :ref:`sniffing` mechanism. - -.. note:: - - Known binary format mimetypes like ``application/mapbox-vector-tile`` will return - the response body as ``bytes`` instead of the usually UTF-8 encoded text. - -.. py:module:: elasticsearch - -Transport ---------- - -.. autoclass:: Transport(hosts, connection_class=Urllib3HttpConnection, connection_pool_class=ConnectionPool, host_info_callback=construct_hosts_list, sniff_on_start=False, sniffer_timeout=None, sniff_on_connection_fail=False, serializer=JSONSerializer(), max_retries=3, ** kwargs) - :members: - - -Connection Pool ---------------- - -.. autoclass:: ConnectionPool(connections, dead_timeout=60, selector_class=RoundRobinSelector, randomize_hosts=True, ** kwargs) - :members: - - -Connection Selector -------------------- - -.. autoclass:: ConnectionSelector(opts) - :members: - - -Urllib3HttpConnection (default connection_class) ------------------------------------------------- - -If you have complex SSL logic for connecting to Elasticsearch using an `SSLContext` object -might be more helpful. You can create one natively using the python SSL library with the -`create_default_context` (https://docs.python.org/3/library/ssl.html#ssl.create_default_context) method. - -To create an `SSLContext` object you only need to use one of cafile, capath or cadata: - -.. code-block:: python - - >>> from ssl import create_default_context - >>> context = create_default_context(cafile=None, capath=None, cadata=None) - -* `cafile` is the path to your CA File -* `capath` is the directory of a collection of CA's -* `cadata` is either an ASCII string of one or more PEM-encoded certificates or a bytes-like object of DER-encoded certificates. - -Please note that the use of SSLContext is only available for urllib3. - -.. autoclass:: Urllib3HttpConnection - :members: - - -API Compatibility HTTP Header ------------------------------ - -The Python client can be configured to emit an HTTP header -``Accept: application/vnd.elasticsearch+json; compatible-with=7`` -which signals to Elasticsearch that the client is requesting -``7.x`` version of request and response bodies. This allows for -upgrading from 7.x to 8.x version of Elasticsearch without upgrading -everything at once. Elasticsearch should be upgraded first after -the compatibility header is configured and clients should be upgraded -second. - - .. code-block:: python - - from elasticsearch import Elasticsearch - - client = Elasticsearch("http://...", headers={"accept": "application/vnd.elasticsearch+json; compatible-with=7"}) - -If you'd like to have the client emit the header without configuring ``headers`` you -can use the environment variable ``ELASTIC_CLIENT_APIVERSIONING=1``. diff --git a/docs/sphinx/exceptions.rst b/docs/sphinx/exceptions.rst index 2ec9c6ce0..ea1098b28 100644 --- a/docs/sphinx/exceptions.rst +++ b/docs/sphinx/exceptions.rst @@ -1,26 +1,25 @@ .. _exceptions: -Exceptions -========== +Exceptions & Warnings +===================== .. py:module:: elasticsearch -.. autoclass:: ImproperlyConfigured - .. autoclass:: ElasticsearchException - -.. autoclass:: SerializationError(ElasticsearchException) - -.. autoclass:: TransportError(ElasticsearchException) +.. autoclass:: SerializationError +.. autoclass:: ConnectionError +.. autoclass:: ConnectionTimeout +.. autoclass:: SSLError +.. autoclass:: ApiError :members: +.. autoclass:: NotFoundError +.. autoclass:: ConflictError +.. autoclass:: RequestError +.. autoclass:: AuthenticationException +.. autoclass:: AuthorizationException +.. autoclass:: UnsupportedProductError -.. autoclass:: ConnectionError(TransportError) -.. autoclass:: ConnectionTimeout(ConnectionError) -.. autoclass:: SSLError(ConnectionError) +Warnings +-------- -.. autoclass:: NotFoundError(TransportError) -.. autoclass:: ConflictError(TransportError) -.. autoclass:: RequestError(TransportError) -.. autoclass:: AuthenticationException(TransportError) -.. autoclass:: AuthorizationException(TransportError) -.. autoclass:: UnsupportedProductError +.. autoclass:: ElasticsearchWarning diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst index 9ea7bbebe..da6e4a976 100644 --- a/docs/sphinx/index.rst +++ b/docs/sphinx/index.rst @@ -77,191 +77,6 @@ a more convenient way of working with Elasticsearch. .. _elasticsearch-dsl: https://elasticsearch-dsl.readthedocs.io/ -Persistent Connections -~~~~~~~~~~~~~~~~~~~~~~ - -``elasticsearch-py`` uses persistent connections inside of individual connection -pools (one per each configured or sniffed node). Out of the box you can choose -between two ``http`` protocol implementations. See :ref:`transports` for more -information. - -The transport layer will create an instance of the selected connection class -per node and keep track of the health of individual nodes - if a node becomes -unresponsive (throwing exceptions while connecting to it) it's put on a timeout -by the :class:`~elasticsearch.ConnectionPool` class and only returned to the -circulation after the timeout is over (or when no live nodes are left). By -default nodes are randomized before being passed into the pool and round-robin -strategy is used for load balancing. - -You can customize this behavior by passing parameters to the -:ref:`connection_api` (all keyword arguments to the -:class:`~elasticsearch.Elasticsearch` class will be passed through). If what -you want to accomplish is not supported you should be able to create a subclass -of the relevant component and pass it in as a parameter to be used instead of -the default implementation. - - -Automatic Retries -~~~~~~~~~~~~~~~~~ - -If a connection to a node fails due to connection issues (raises -:class:`~elasticsearch.ConnectionError`) it is considered in faulty state. It -will be placed on hold for ``dead_timeout`` seconds and the request will be -retried on another node. If a connection fails multiple times in a row the -timeout will get progressively larger to avoid hitting a node that's, by all -indication, down. If no live connection is available, the connection that has -the smallest timeout will be used. - -By default retries are not triggered by a timeout -(:class:`~elasticsearch.ConnectionTimeout`), set ``retry_on_timeout`` to -``True`` to also retry on timeouts. - -.. _sniffing: - -Sniffing -~~~~~~~~ - -The client can be configured to inspect the cluster state to get a list of -nodes upon startup, periodically and/or on failure. See -:class:`~elasticsearch.Transport` parameters for details. - -Some example configurations: - - .. code-block:: python - - from elasticsearch import Elasticsearch - - # by default we don't sniff, ever - es = Elasticsearch() - - # you can specify to sniff on startup to inspect the cluster and load - # balance across all nodes - es = Elasticsearch(["seed1", "seed2"], sniff_on_start=True) - - # you can also sniff periodically and/or after failure: - es = Elasticsearch(["seed1", "seed2"], - sniff_on_start=True, - sniff_on_connection_fail=True, - sniffer_timeout=60) - -Thread safety -~~~~~~~~~~~~~ - -The client is thread safe and can be used in a multi threaded environment. Best -practice is to create a single global instance of the client and use it -throughout your application. If your application is long-running consider -turning on :ref:`sniffing` to make sure the client is up to date on the cluster -location. - -By default we allow ``urllib3`` to open up to 10 connections to each node, if -your application calls for more parallelism, use the ``maxsize`` parameter to -raise the limit: - -.. code-block:: python - - # allow up to 25 connections to each node - es = Elasticsearch(["host1", "host2"], maxsize=25) - -.. note:: - - Since we use persistent connections throughout the client it means that the - client doesn't tolerate ``fork`` very well. If your application calls for - multiple processes make sure you create a fresh client after call to - ``fork``. Note that Python's ``multiprocessing`` module uses ``fork`` to - create new processes on POSIX systems. - -TLS/SSL and Authentication -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can configure the client to use ``SSL`` for connecting to your -elasticsearch cluster, including certificate verification and HTTP auth: - -.. code-block:: python - - from elasticsearch import Elasticsearch - - # you can use RFC-1738 to specify the url - es = Elasticsearch(['https://user:secret@localhost:443']) - - # ... or specify common parameters as kwargs - - es = Elasticsearch( - ['localhost', 'otherhost'], - http_auth=('user', 'secret'), - scheme="https", - port=443, - ) - - # SSL client authentication using client_cert and client_key - - from ssl import create_default_context - - context = create_default_context(cafile="path/to/cert.pem") - es = Elasticsearch( - ['localhost', 'otherhost'], - http_auth=('user', 'secret'), - scheme="https", - port=443, - ssl_context=context, - ) - -.. warning:: - - ``elasticsearch-py`` doesn't ship with default set of root certificates. To - have working SSL certificate validation you need to either specify your own - as ``cafile`` or ``capath`` or ``cadata`` or install `certifi`_ which will - be picked up automatically. - - -See class :class:`~elasticsearch.Urllib3HttpConnection` for detailed -description of the options. - -.. _certifi: http://certifiio.readthedocs.io/en/latest/ - -Connecting via Cloud ID -~~~~~~~~~~~~~~~~~~~~~~~ - -Cloud ID is an easy way to configure your client to work -with your Elastic Cloud deployment. Combine the ``cloud_id`` -with either ``http_auth`` or ``api_key`` to authenticate -with your Elastic Cloud deployment. - -Using ``cloud_id`` enables TLS verification and HTTP compression by default -and sets the port to ``443`` unless otherwise overwritten via the ``port`` parameter -or the port value encoded within ``cloud_id``. Using Cloud ID also disables sniffing. - -.. code-block:: python - - from elasticsearch import Elasticsearch - - es = Elasticsearch( - cloud_id="cluster-1:dXMa5Fx...", - http_auth=("elastic", ""), - ) - -API Key Authentication -~~~~~~~~~~~~~~~~~~~~~~ - -You can configure the client to use Elasticsearch's `API Key`_ for connecting to your cluster. -Please note this authentication method has been introduced with release of Elasticsearch ``6.7.0``. - -.. code-block:: python - - from elasticsearch import Elasticsearch - - # you can use the api key tuple - es = Elasticsearch( - ['node-1', 'node-2', 'node-3'], - api_key=('id', 'api_key'), - ) - - # or you pass the base 64 encoded token - es = Elasticsearch( - ['node-1', 'node-2', 'node-3'], - api_key='base64encoded tuple', - ) - -.. _API Key: https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html Logging ~~~~~~~ @@ -278,101 +93,6 @@ already it is set to `propagate=False` so it needs to be activated separately. .. _logging library: http://docs.python.org/3/library/logging.html -Type Hints -~~~~~~~~~~ - -Starting in ``elasticsearch-py`` v7.10.0 the library now ships with `type hints`_ -and supports basic static type analysis with tools like `Mypy`_ and `Pyright`_. - -If we write a script that has a type error like using ``request_timeout`` with -a ``str`` argument instead of ``float`` and then run Mypy on the script: - -.. code-block:: python - - # script.py - from elasticsearch import Elasticsearch - - es = Elasticsearch(...) - es.search( - index="test-index", - request_timeout="5" # type error! - ) - - # $ mypy script.py - # script.py:5: error: Argument "request_timeout" to "search" of "Elasticsearch" has - # incompatible type "str"; expected "Union[int, float, None]" - # Found 1 error in 1 file (checked 1 source file) - -For now many parameter types for API methods aren't specific to -a type (ie they are of type ``typing.Any``) but in the future -they will be tightened for even better static type checking. - -Type hints also allow tools like your IDE to check types and provide better -auto-complete functionality. - -.. warning:: - - The type hints for API methods like ``search`` don't match the function signature - that can be found in the source code. Type hints represent optimal usage of the - API methods. Using keyword arguments is highly recommended so all optional parameters - and ``body`` are keyword-only in type hints. - - JetBrains PyCharm will use the warning ``Unexpected argument`` to denote that the - parameter may be keyword-only. - -.. _type hints: https://www.python.org/dev/peps/pep-0484 -.. _mypy: http://mypy-lang.org -.. _pyright: https://github.com/microsoft/pyright - - -Environment considerations --------------------------- - -When using the client there are several limitations of your environment that -could come into play. - -When using an HTTP load balancer you cannot use the :ref:`sniffing` -functionality - the cluster would supply the client with IP addresses to -directly connect to the cluster, circumventing the load balancer. Depending on -your configuration this might be something you don't want or break completely. - -Compression -~~~~~~~~~~~ - -When using capacity-constrained networks (low throughput), it may be handy to enable -compression. This is especially useful when doing bulk loads or inserting large -documents. This will configure compression. - -.. code-block:: python - - from elasticsearch import Elasticsearch - es = Elasticsearch(hosts, http_compress=True) - -Compression is enabled by default when connecting to Elastic Cloud via ``cloud_id``. - -Customization -------------- - -Custom serializers -~~~~~~~~~~~~~~~~~~ - -By default, ``JSONSerializer`` is used to encode all outgoing requests. -However, you can implement your own custom serializer - -.. code-block:: python - - from elasticsearch.serializer import JSONSerializer - - class SetEncoder(JSONSerializer): - def default(self, obj): - if isinstance(obj, set): - return list(obj) - if isinstance(obj, Something): - return 'CustomSomethingRepresentation' - return JSONSerializer.default(self, obj) - - es = Elasticsearch(serializer=SetEncoder()) - Elasticsearch-DSL ----------------- @@ -399,13 +119,11 @@ Contents -------- .. toctree:: - :maxdepth: 2 + :maxdepth: 3 api exceptions async - connection - transports helpers Release Notes diff --git a/docs/sphinx/transports.rst b/docs/sphinx/transports.rst deleted file mode 100644 index be8d2ebc5..000000000 --- a/docs/sphinx/transports.rst +++ /dev/null @@ -1,54 +0,0 @@ -.. _transports: - -Transport classes -================= - -List of transport classes that can be used, simply import your choice and pass -it to the constructor of :class:`~elasticsearch.Elasticsearch` as -`connection_class`. Note that the -:class:`~elasticsearch.connection.RequestsHttpConnection` requires ``requests`` -to be installed. - -For example to use the ``requests``-based connection just import it and use it: - -.. code-block:: python - - from elasticsearch import Elasticsearch, RequestsHttpConnection - es = Elasticsearch(connection_class=RequestsHttpConnection) - -The default connection class is based on ``urllib3`` which is more performant -and lightweight than the optional ``requests``-based class. Only use -``RequestsHttpConnection`` if you have need of any of ``requests`` advanced -features like custom auth plugins etc. - - -Product check and unsupported distributions -------------------------------------------- - -Starting in v7.14.0 the client performs a required product check before -the first API call is executed. This product check allows the client to -establish that it's communicating with a supported Elasticsearch cluster. - -For 8.x clients the product check will verify that the ``X-Elastic-Product: Elasticsearch`` -HTTP header is being sent with every response. If the client detects that it's not connected -to a supported distribution of Elasticsearch the ``UnsupportedProductError`` exception -will be raised. - -.. py:module:: elasticsearch.connection - -Connection ----------- - -.. autoclass:: Connection - -Urllib3HttpConnection ---------------------- - -.. autoclass:: Urllib3HttpConnection - - -RequestsHttpConnection ----------------------- - -.. autoclass:: RequestsHttpConnection - diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index 6ec14bbc7..86952db9a 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -75,22 +75,40 @@ class AsyncElasticsearch(BaseClient): """ Elasticsearch low-level client. Provides a straightforward mapping from - Python to ES REST endpoints. - - The instance has attributes ``cat``, ``cluster``, ``indices``, ``ingest``, - ``nodes``, ``snapshot`` and ``tasks`` that provide access to instances of - :class:`~elasticsearch.client.CatClient`, - :class:`~elasticsearch.client.ClusterClient`, - :class:`~elasticsearch.client.IndicesClient`, - :class:`~elasticsearch.client.IngestClient`, - :class:`~elasticsearch.client.NodesClient`, - :class:`~elasticsearch.client.SnapshotClient` and - :class:`~elasticsearch.client.TasksClient` respectively. This is the - preferred (and only supported) way to get access to those classes and their - methods. + Python to Elasticsearch REST APIs. + + The client instance has additional attributes to update APIs in different + namespaces such as ``async_search``, ``indices``, ``security``, and more: + + .. code-block:: python + + client = Elasticsearch("http://localhost:9200") + + # Get Document API + client.get(index="*", id="1") + + # Get Index API + client.indices.get(index="*") + + Transport options can be set on the client constructor or using + the :meth:`~elasticsearch.Elasticsearch.options` method: + + .. code-block:: python + + # Set 'api_key' on the constructor + client = Elasticsearch( + "http://localhost:9200", + api_key=("id", "api_key") + ) + client.search(...) + + # Set 'api_key' per request + client.options(api_key=("id", "api_key")).search(...) If you want to turn on :ref:`sniffing` you have several options (described - in :class:`~elasticsearch.Transport`):: + in :class:`~elastic_transport.Transport`): + + .. code-block:: python # create connection that will automatically inspect the cluster to get # the list of active nodes. Start with nodes running on 'esnode1' and @@ -115,7 +133,7 @@ class AsyncElasticsearch(BaseClient): {'host': 'othernode', 'port': 443, 'url_prefix': 'es', 'use_ssl': True}, ]) - If using SSL, there are several parameters that control how we deal with + If using TLS/SSL, there are several parameters that control how we deal with certificates (see :class:`~elasticsearch.Urllib3HttpConnection` for detailed description of the options):: @@ -135,8 +153,6 @@ class AsyncElasticsearch(BaseClient): es = Elasticsearch( ['https://localhost:443', 'https://other_host:443'], - # turn on SSL - use_ssl=True, # no verify SSL certificates verify_certs=False, # don't show warnings about ssl certs verification @@ -149,8 +165,6 @@ class AsyncElasticsearch(BaseClient): es = Elasticsearch( ['https://localhost:443', 'https://other_host:443'], - # turn on SSL - use_ssl=True, # make sure we verify SSL certificates verify_certs=True, # provide a path to CA certs on disk diff --git a/elasticsearch/client.py b/elasticsearch/client.py index a54173df6..35bbc8213 100644 --- a/elasticsearch/client.py +++ b/elasticsearch/client.py @@ -17,7 +17,7 @@ import warnings -from ._sync.client import Elasticsearch # noqa: F401 +from ._sync.client import Elasticsearch as Elasticsearch # noqa: F401 from ._sync.client.async_search import ( # noqa: F401 AsyncSearchClient as AsyncSearchClient, ) diff --git a/noxfile.py b/noxfile.py index ad88e4993..3f93a32b2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,6 +21,7 @@ SOURCE_DIR = os.path.dirname(os.path.abspath(__file__)) SOURCE_FILES = ( + "docs/sphinx/conf.py", "setup.py", "noxfile.py", "elasticsearch/", @@ -94,10 +95,10 @@ def lint(session): @nox.session() def docs(session): - session.install(".") session.install( "-rdev-requirements.txt", "sphinx-rtd-theme", "sphinx-autodoc-typehints" ) + session.install(".") session.run("python", "-m", "pip", "install", "sphinx-autodoc-typehints") session.run("sphinx-build", "docs/sphinx/", "docs/sphinx/_build", "-b", "html")