From 5a05bd08282068c3de566f850a5a19f7be5bfa6c Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Tue, 15 Apr 2025 18:11:40 +0100 Subject: [PATCH] 8.18 client documentation updates --- docs/guide/async.asciidoc | 141 ++++++++++++++++++++++ docs/guide/examples.asciidoc | 176 ++++++++++++++++++++++++++++ docs/guide/getting-started.asciidoc | 58 ++++++++- 3 files changed, 370 insertions(+), 5 deletions(-) create mode 100644 docs/guide/async.asciidoc diff --git a/docs/guide/async.asciidoc b/docs/guide/async.asciidoc new file mode 100644 index 000000000..9f3c04acd --- /dev/null +++ b/docs/guide/async.asciidoc @@ -0,0 +1,141 @@ +[[async]] +== Using with asyncio + +The `elasticsearch` package supports async/await with +https://docs.python.org/3/library/asyncio.html[asyncio] and +https://docs.aiohttp.org[aiohttp]. You can either install `aiohttp` +directly or use the `[async]` extra: + +[source,bash] +---- +$ python -m pip install elasticsearch aiohttp + +# - OR - + +$ python -m pip install elasticsearch[async] +---- + +[discrete] +=== Getting Started with Async + +After installation all async API endpoints are available via +`~elasticsearch.AsyncElasticsearch` and are used in the same way as +other APIs, with an extra `await`: + +[source,python] +---- +import asyncio +from elasticsearch import AsyncElasticsearch + +client = AsyncElasticsearch() + +async def main(): + resp = await client.search( + index="documents", + body={"query": {"match_all": {}}}, + size=20, + ) + print(resp) + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) +---- + +All APIs that are available under the sync client are also available +under the async client. + +https://elasticsearch-py.readthedocs.io/en/latest/async.html#api-reference[Reference documentation] + +[discrete] +=== ASGI Applications and Elastic APM + +https://asgi.readthedocs.io[ASGI] (Asynchronous Server Gateway +Interface) is a way to serve Python web applications making use of +async I/O to achieve better performance. Some examples of ASGI +frameworks include FastAPI, Django 3.0+, and Starlette. If you're +using one of these frameworks along with Elasticsearch then you should +be using `~elasticsearch.AsyncElasticsearch` to avoid blocking the event +loop with synchronous network calls for optimal performance. + +https://www.elastic.co/guide/en/apm/agent/python/current/index.html[Elastic +APM] also supports tracing of async Elasticsearch queries just the same +as synchronous queries. For an example on how to configure +`AsyncElasticsearch` with a popular ASGI framework +https://fastapi.tiangolo.com/[FastAPI] and APM tracing there is a +https://github.com/elastic/elasticsearch-py/tree/master/examples/fastapi-apm[pre-built +example] in the `examples/fastapi-apm` directory. + +See also the <> page. + +[discrete] +=== Frequently Asked Questions + +[discrete] +==== ValueError when initializing `AsyncElasticsearch`? + +If when trying to use `AsyncElasticsearch` you receive +`ValueError: You must have 'aiohttp' installed to use AiohttpHttpNode` +you should ensure that you have `aiohttp` installed in your environment +(check with `$ python -m pip freeze | grep aiohttp`). Otherwise, +async support won't be available. + +[discrete] +==== What about the `elasticsearch-async` package? + +Previously asyncio was supported separately via the +https://github.com/elastic/elasticsearch-py-async[elasticsearch-async] +package. The `elasticsearch-async` package has been deprecated in favor +of `AsyncElasticsearch` provided by the `elasticsearch` package in v7.8 +and onwards. + +[discrete] +==== Receiving 'Unclosed client session / connector' warning? + +This warning is created by `aiohttp` when an open HTTP connection is +garbage collected. You'll typically run into this when closing your +application. To resolve the issue ensure that +`~elasticsearch.AsyncElasticsearch.close` is called before the +`~elasticsearch.AsyncElasticsearch` instance is garbage collected. + +For example if using FastAPI that might look like this: + +[source,python] +---- +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from elasticsearch import AsyncElasticsearch + +ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] +client = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + global client + client = AsyncElasticsearch(ELASTICSEARCH_URL) + yield + await client.close() + +app = FastAPI(lifespan=lifespan) + +@app.get("/") +async def main(): + return await client.info() +---- + +You can run this example by saving it to `main.py` and executing +`ELASTICSEARCH_URL=http://localhost:9200 uvicorn main:app`. + +[discrete] +=== Async Helpers + +Async variants of all helpers are available in `elasticsearch.helpers` +and are all prefixed with `async_*`. You'll notice that these APIs +are identical to the ones in the sync <> documentation. + +All async helpers that accept an iterator or generator also accept async +iterators and async generators. + +https://elasticsearch-py.readthedocs.io/en/latest/async.html#async-helpers[Reference documentation] + diff --git a/docs/guide/examples.asciidoc b/docs/guide/examples.asciidoc index b9a5650a6..575f43bbe 100644 --- a/docs/guide/examples.asciidoc +++ b/docs/guide/examples.asciidoc @@ -109,3 +109,179 @@ method: ---------------------------- client.delete(index="test-index", id=1) ---------------------------- + +[discrete] +[[ex-interactive]] +=== Interactive examples + +The https://github.com/elastic/elasticsearch-labs[elasticsearch-labs] +repo contains interactive and executable +https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[Python +notebooks], sample apps, and resources for testing out Elasticsearch, +using the Python client. These examples are mainly focused on vector +search, hybrid search and generative AI use cases, but you'll also find +examples of basic operations like creating index mappings and performing +lexical search. + +[discrete] +==== Search notebooks + +The +https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search[Search] +folder is a good place to start if you're new to Elasticsearch. This +folder contains a number of notebooks that demonstrate the fundamentals +of Elasticsearch, like indexing vectors, running lexical, semantic and +_hybrid_ searches, and more. + +The following notebooks are available: + +[arabic, start=0] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb[Quick +start] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb[Keyword, +querying, filtering] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb[Hybrid +search] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[Semantic +search with ELSER] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb[Multilingual +semantic search] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb[Query +rules] +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb[Synonyms +API quick start] + +Here's a brief overview of what you'll learn in each notebook. + +[discrete] +===== Quick start + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/00-quick-start.ipynb[00-quick-start.ipynb] +notebook you'll learn how to: + +* Use the Elasticsearch Python client for various operations. +* Create and define an index for a sample dataset with +`dense_vector` fields. +* Transform book titles into embeddings using +https://www.sbert.net[Sentence Transformers] and index them into +Elasticsearch. +* Perform k-nearest neighbors (knn) semantic searches. +* Integrate traditional text-based search with semantic search, for a +hybrid search system. +* Use reciprocal rank fusion (RRF) to intelligently combine search +results from different retrieval systems. + +[discrete] +===== Keyword, querying, filtering + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/01-keyword-querying-filtering.ipynb[01-keyword-querying-filtering.ipynb] +notebook, you'll learn how to: + +* Use +https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html[query +and filter contexts] to search and filter documents in Elasticsearch. +* Execute full-text searches with `match` and `multi-match` queries. +* Query and filter documents based on `text`, `number`, `date`, or +`boolean` values. +* Run multi-field searches using the `multi-match` query. +* Prioritize specific fields in the `multi-match` query for tailored +results. + +[discrete] +===== Hybrid search + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/02-hybrid-search.ipynb[02-hybrid-search.ipynb] +notebook, you'll learn how to: + +* Combine results of traditional text-based search with semantic search, +for a hybrid search system. +* Transform fields in the sample dataset into embeddings using the +Sentence Transformer model and index them into Elasticsearch. +* Use the +https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html#rrf-api[RRF +API] to combine the results of a `match` query and a `kNN` semantic +search. +* Walk through a super simple toy example that demonstrates, step by +step, how RRF ranking works. + +[discrete] +===== Semantic search with ELSER + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[03-ELSER.ipynb] +notebook, you'll learn how to: + +* Use the Elastic Learned Sparse Encoder (ELSER) for text +expansion-powered semantic search, out of the box — without training, +fine-tuning, or embeddings generation. +* Download and deploy the ELSER model in your Elastic environment. +* Create an Elasticsearch index named [.title-ref]#search-movies# with +specific mappings and index a dataset of movie descriptions. +* Create an ingest pipeline containing an inference processor for ELSER +model execution. +* Reindex the data from [.title-ref]#search-movies# into another index, +[.title-ref]#elser-movies#, using the ELSER pipeline for text expansion. +* Observe the results of running the documents through the model by +inspecting the additional terms it adds to documents, which enhance +searchability. +* Perform simple keyword searches on the [.title-ref]#elser-movies# +index to assess the impact of ELSER's text expansion. +* Execute ELSER-powered semantic searches using the `text_expansion` +query. + +[discrete] +===== Multilingual semantic search + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/04-multilingual.ipynb[04-multilingual.ipynb] +notebook, you'll learn how to: + +* Use a multilingual embedding model for semantic search across +languages. +* Transform fields in the sample dataset into embeddings using the +Sentence Transformer model and index them into Elasticsearch. +* Use filtering with a `kNN` semantic search. +* Walk through a super simple toy example that demonstrates, step by +step, how multilingual search works across languages, and within +non-English languages. + +[discrete] +===== Query rules + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/05-query-rules.ipynb[05-query-rules.ipynb] +notebook, you'll learn how to: + +* Use the query rules management APIs to create and edit promotional +rules based on contextual queries. +* Apply these query rules by using the `rule_query` in Query DSL. + +[discrete] +===== Synonyms API quick start + +In the +https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/06-synonyms-api.ipynb[06-synonyms-api.ipynb] +notebook, you'll learn how to: + +* Use the synonyms management API to create a synonyms set to enhance +your search recall. +* Configure an index to use search-time synonyms. +* Update synonyms in real time. +* Run queries that are enhanced by synonyms. + +[discrete] +==== Other notebooks + +* https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/generative-ai[Generative +AI]. Notebooks that demonstrate various use cases for Elasticsearch as +the retrieval engine and vector store for LLM-powered applications. +* https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations[Integrations]. +Notebooks that demonstrate how to integrate popular services and +projects with Elasticsearch, including OpenAI, Hugging Face, and +LlamaIndex +* https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/langchain[Langchain]. +Notebooks that demonstrate how to integrate Elastic with LangChain, a +framework for developing applications powered by language models. diff --git a/docs/guide/getting-started.asciidoc b/docs/guide/getting-started.asciidoc index 1b964e50c..58b6f33a5 100644 --- a/docs/guide/getting-started.asciidoc +++ b/docs/guide/getting-started.asciidoc @@ -70,11 +70,33 @@ This is how you create the `my_index` index: client.indices.create(index="my_index") ---- +Optionally, you can first define the expected types of your features with a +custom mapping. + +[source,py] +---- +mappings = { + "properties": { + "foo": {"type": "text"}, + "bar": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256, + } + }, + }, + } +} + +client.indices.create(index="my_index", mappings=mappings) +---- [discrete] ==== Indexing documents -This is a simple way of indexing a document: +This indexes a document with the index API: [source,py] ---- @@ -88,6 +110,28 @@ client.index( ) ---- +You can also index multiple documents at once with the bulk helper function: + +[source,py] +---- +from elasticsearch import helpers + +def generate_docs(): + for i in range(10): + yield { + "_index": "my_index", + "foo": f"foo {i}", + "bar": "bar", + } + +helpers.bulk(client, generate_docs()) +---- + +These helpers are the recommended way to perform bulk ingestion. While it is +also possible to perform bulk ingestion using `client.bulk` directly, the +helpers handle retries, ingesting chunk by chunk and more. See the +<> page for more details. + [discrete] ==== Getting documents @@ -122,10 +166,14 @@ This is how you can update a document, for example to add a new field: [source,py] ---- -client.update(index="my_index", id="my_document_id", doc={ - "foo": "bar", - "new_field": "new value", -}) +client.update( + index="my_index", + id="my_document_id", + doc={ + "foo": "bar", + "new_field": "new value", + } +) ----