diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
index 1c966b828..25f832f5d 100644
--- a/elasticsearch/_async/client/__init__.py
+++ b/elasticsearch/_async/client/__init__.py
@@ -1455,74 +1455,146 @@ async def delete_by_query(
"""
.. raw:: html
-
Delete documents.
- Deletes documents that match the specified query.
+ Delete documents.
+ Deletes documents that match the specified query.
+ If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:
+
+ read
+ delete
or write
+
+ You can specify the query criteria in the request URI or the request body using the same syntax as the search API.
+ When you submit a delete by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and deletes matching documents using internal versioning.
+ If a document changes between the time that the snapshot is taken and the delete operation is processed, it results in a version conflict and the delete operation fails.
+ NOTE: Documents with a version equal to 0 cannot be deleted using delete by query because internal versioning does not support 0 as a valid version number.
+ While processing a delete by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents to delete.
+ A bulk delete request is performed for each batch of matching documents.
+ If a search or bulk request is rejected, the requests are retried up to 10 times, with exponential back off.
+ If the maximum retry limit is reached, processing halts and all failed requests are returned in the response.
+ Any delete requests that completed successfully still stick, they are not rolled back.
+ You can opt to count version conflicts instead of halting and returning by setting conflicts
to proceed
.
+ Note that if you opt to count version conflicts the operation could attempt to delete more documents from the source than max_docs
until it has successfully deleted max_docs documents
, or it has gone through every document in the source query.
+ Throttling delete requests
+ To control the rate at which delete by query issues batches of delete operations, you can set requests_per_second
to any positive decimal number.
+ This pads each batch with a wait time to throttle the rate.
+ Set requests_per_second
to -1
to disable throttling.
+ Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
+ The padding time is the difference between the batch size divided by the requests_per_second
and the time spent writing.
+ By default the batch size is 1000
, so if requests_per_second
is set to 500
:
+ target_time = 1000 / 500 per second = 2 seconds
+ wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
+
+ Since the batch is issued as a single _bulk
request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set.
+ This is "bursty" instead of "smooth".
+ Slicing
+ Delete by query supports sliced scroll to parallelize the delete process.
+ This can improve efficiency and provide a convenient way to break the request down into smaller parts.
+ Setting slices
to auto
lets Elasticsearch choose the number of slices to use.
+ This setting will use one slice per shard, up to a certain limit.
+ If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.
+ Adding slices to the delete by query operation creates sub-requests which means it has some quirks:
+
+ - You can see these requests in the tasks APIs. These sub-requests are "child" tasks of the task for the request with slices.
+ - Fetching the status of the task for the request with slices only contains the status of completed slices.
+ - These sub-requests are individually addressable for things like cancellation and rethrottling.
+ - Rethrottling the request with
slices
will rethrottle the unfinished sub-request proportionally.
+ - Canceling the request with
slices
will cancel each sub-request.
+ - Due to the nature of
slices
each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.
+ - Parameters like
requests_per_second
and max_docs
on a request with slices
are distributed proportionally to each sub-request. Combine that with the earlier point about distribution being uneven and you should conclude that using max_docs
with slices
might not result in exactly max_docs
documents being deleted.
+ - Each sub-request gets a slightly different snapshot of the source data stream or index though these are all taken at approximately the same time.
+
+ If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:
+
+ - Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many
slices
hurts performance. Setting slices
higher than the number of shards generally does not improve efficiency and adds overhead.
+ - Delete performance scales linearly across available resources with the number of slices.
+
+ Whether query or delete performance dominates the runtime depends on the documents being reindexed and cluster resources.
+ Cancel a delete by query operation
+ Any delete by query can be canceled using the task cancel API. For example:
+ POST _tasks/r1A2WoRbTwKZ516z6NEs5A:36619/_cancel
+
+ The task ID can be found by using the get tasks API.
+ Cancellation should happen quickly but might take a few seconds.
+ The get task status API will continue to list the delete by query task until this task checks that it has been cancelled and terminates itself.
``_
- :param index: Comma-separated list of data streams, indices, and aliases to search.
- Supports wildcards (`*`). To search all data streams or indices, omit this
- parameter or use `*` or `_all`.
+ :param index: A comma-separated list of data streams, indices, and aliases to
+ search. It supports wildcards (`*`). To search all data streams or indices,
+ omit this parameter or use `*` or `_all`.
:param allow_no_indices: If `false`, the request returns an error if any wildcard
expression, index alias, or `_all` value targets only missing or closed indices.
This behavior applies even if the request targets other open indices. For
example, a request targeting `foo*,bar*` returns an error if an index starts
with `foo` but no index starts with `bar`.
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
- :param analyzer: Analyzer to use for the query string.
+ This parameter can be used only when the `q` query string parameter is specified.
+ :param analyzer: Analyzer to use for the query string. This parameter can be
+ used only when the `q` query string parameter is specified.
:param conflicts: What to do if delete by query hits version conflicts: `abort`
or `proceed`.
:param default_operator: The default operator for query string query: `AND` or
- `OR`.
- :param df: Field to use as default where no field prefix is given in the query
- string.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
+ `OR`. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param df: The field to use as default where no field prefix is given in the
+ query string. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values, such as `open,hidden`.
:param from_: Starting offset (default: 0)
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
:param lenient: If `true`, format-based query failures (such as providing text
- to a numeric field) in the query string will be ignored.
+ to a numeric field) in the query string will be ignored. This parameter can
+ be used only when the `q` query string parameter is specified.
:param max_docs: The maximum number of documents to delete.
- :param preference: Specifies the node or shard the operation should be performed
- on. Random by default.
- :param q: Query in the Lucene query string syntax.
- :param query: Specifies the documents to delete using the Query DSL.
+ :param preference: The node or shard the operation should be performed on. It
+ is random by default.
+ :param q: A query in the Lucene query string syntax.
+ :param query: The documents to delete specified with Query DSL.
:param refresh: If `true`, Elasticsearch refreshes all shards involved in the
- delete by query after the request completes.
+ delete by query after the request completes. This is different than the delete
+ API's `refresh` parameter, which causes just the shard that received the
+ delete request to be refreshed. Unlike the delete API, it does not support
+ `wait_for`.
:param request_cache: If `true`, the request cache is used for this request.
Defaults to the index-level setting.
:param requests_per_second: The throttle for this request in sub-requests per
second.
- :param routing: Custom value used to route operations to a specific shard.
- :param scroll: Period to retain the search context for scrolling.
- :param scroll_size: Size of the scroll request that powers the operation.
- :param search_timeout: Explicit timeout for each search request. Defaults to
- no timeout.
- :param search_type: The type of the search operation. Available options: `query_then_fetch`,
- `dfs_query_then_fetch`.
+ :param routing: A custom value used to route operations to a specific shard.
+ :param scroll: The period to retain the search context for scrolling.
+ :param scroll_size: The size of the scroll request that powers the operation.
+ :param search_timeout: The explicit timeout for each search request. It defaults
+ to no timeout.
+ :param search_type: The type of the search operation. Available options include
+ `query_then_fetch` and `dfs_query_then_fetch`.
:param slice: Slice the request manually using the provided slice ID and total
number of slices.
:param slices: The number of slices this task should be divided into.
- :param sort: A comma-separated list of : pairs.
- :param stats: Specific `tag` of the request for logging and statistical purposes.
- :param terminate_after: Maximum number of documents to collect for each shard.
+ :param sort: A comma-separated list of `:` pairs.
+ :param stats: The specific `tag` of the request for logging and statistical purposes.
+ :param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
Elasticsearch collects documents before sorting. Use with caution. Elasticsearch
applies this parameter to each shard handling the request. When possible,
let Elasticsearch perform early termination automatically. Avoid specifying
this parameter for requests that target data streams with backing indices
across multiple data tiers.
- :param timeout: Period each deletion request waits for active shards.
+ :param timeout: The period each deletion request waits for active shards.
:param version: If `true`, returns the document version as part of a hit.
:param wait_for_active_shards: The number of shard copies that must be active
- before proceeding with the operation. Set to all or any positive integer
- up to the total number of shards in the index (`number_of_replicas+1`).
+ before proceeding with the operation. Set to `all` or any positive integer
+ up to the total number of shards in the index (`number_of_replicas+1`). The
+ `timeout` value controls how long each write request waits for unavailable
+ shards to become available.
:param wait_for_completion: If `true`, the request blocks until the operation
- is complete.
+ is complete. If `false`, Elasticsearch performs some preflight checks, launches
+ the request, and returns a task you can use to cancel or get the status of
+ the task. Elasticsearch creates a record of this task as a document at `.tasks/task/${taskId}`.
+ When you are done with a task, you should delete the task document so Elasticsearch
+ can reclaim the space.
"""
if index in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'index'")
@@ -1642,11 +1714,11 @@ async def delete_by_query_rethrottle(
Rethrottling that speeds up the query takes effect immediately but rethrotting that slows down the query takes effect after completing the current batch to prevent scroll timeouts.
- ``_
+ ``_
:param task_id: The ID for the task.
:param requests_per_second: The throttle for this request in sub-requests per
- second.
+ second. To disable throttling, set it to `-1`.
"""
if task_id in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'task_id'")
@@ -3597,30 +3669,50 @@ async def open_point_in_time(
search requests using the same point in time. For example, if refreshes happen between
search_after
requests, then the results of those requests might not be consistent as changes happening
between searches are only visible to the more recent point in time.
- A point in time must be opened explicitly before being used in search requests.
- The keep_alive
parameter tells Elasticsearch how long it should persist.
+ A point in time must be opened explicitly before being used in search requests.
+ A subsequent search request with the pit
parameter must not specify index
, routing
, or preference
values as these parameters are copied from the point in time.
+ Just like regular searches, you can use from
and size
to page through point in time search results, up to the first 10,000 hits.
+ If you want to retrieve more hits, use PIT with search_after
.
+ IMPORTANT: The open point in time request and each subsequent search request can return different identifiers; always use the most recently received ID for the next search request.
+ When a PIT that contains shard failures is used in a search request, the missing are always reported in the search response as a NoShardAvailableActionException
exception.
+ To get rid of these exceptions, a new PIT needs to be created so that shards missing from the previous PIT can be handled, assuming they become available in the meantime.
+ Keeping point in time alive
+ The keep_alive
parameter, which is passed to a open point in time request and search request, extends the time to live of the corresponding point in time.
+ The value does not need to be long enough to process all data — it just needs to be long enough for the next request.
+ Normally, the background merge process optimizes the index by merging together smaller segments to create new, bigger segments.
+ Once the smaller segments are no longer needed they are deleted.
+ However, open point-in-times prevent the old segments from being deleted since they are still in use.
+ TIP: Keeping older segments alive means that more disk space and file handles are needed.
+ Ensure that you have configured your nodes to have ample free file handles.
+ Additionally, if a segment contains deleted or updated documents then the point in time must keep track of whether each document in the segment was live at the time of the initial search request.
+ Ensure that your nodes have sufficient heap space if you have many open point-in-times on an index that is subject to ongoing deletes or updates.
+ Note that a point-in-time doesn't prevent its associated indices from being deleted.
+ You can check how many point-in-times (that is, search contexts) are open with the nodes stats API.
``_
:param index: A comma-separated list of index names to open point in time; use
`_all` or empty string to perform the operation on all indices
- :param keep_alive: Extends the time to live of the corresponding point in time.
- :param allow_partial_search_results: If `false`, creating a point in time request
- when a shard is missing or unavailable will throw an exception. If `true`,
- the point in time will contain all the shards that are available at the time
- of the request.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
+ :param keep_alive: Extend the length of time that the point in time persists.
+ :param allow_partial_search_results: Indicates whether the point in time tolerates
+ unavailable shards or shard failures when initially creating the PIT. If
+ `false`, creating a point in time request when a shard is missing or unavailable
+ will throw an exception. If `true`, the point in time will contain all the
+ shards that are available at the time of the request.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`,
+ `hidden`, `none`.
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
- :param index_filter: Allows to filter indices if the provided query rewrites
- to `match_none` on every shard.
- :param preference: Specifies the node or shard the operation should be performed
- on. Random by default.
- :param routing: Custom value used to route operations to a specific shard.
+ :param index_filter: Filter indices if the provided query rewrites to `match_none`
+ on every shard.
+ :param preference: The node or shard the operation should be performed on. By
+ default, it is random.
+ :param routing: A custom value that is used to route operations to a specific
+ shard.
"""
if index in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'index'")
@@ -4530,13 +4622,23 @@ async def search(
Get search hits that match the query defined in the request.
You can provide search queries using the q
query string parameter or the request body.
If both are specified, only the query parameter is used.
+ If the Elasticsearch security features are enabled, you must have the read index privilege for the target data stream, index, or alias. For cross-cluster search, refer to the documentation about configuring CCS privileges.
+ To search a point in time (PIT) for an alias, you must have the read
index privilege for the alias's data streams or indices.
+ Search slicing
+ When paging through a large number of documents, it can be helpful to split the search into multiple slices to consume them independently with the slice
and pit
properties.
+ By default the splitting is done first on the shards, then locally on each shard.
+ The local splitting partitions the shard into contiguous ranges based on Lucene document IDs.
+ For instance if the number of shards is equal to 2 and you request 4 slices, the slices 0 and 2 are assigned to the first shard and the slices 1 and 3 are assigned to the second shard.
+ IMPORTANT: The same point-in-time ID should be used for all slices.
+ If different PIT IDs are used, slices can overlap and miss documents.
+ This situation can occur because the splitting criterion is based on Lucene document IDs, which are not stable across changes to the index.
``_
- :param index: Comma-separated list of data streams, indices, and aliases to search.
- Supports wildcards (`*`). To search all data streams and indices, omit this
- parameter or use `*` or `_all`.
+ :param index: A comma-separated list of data streams, indices, and aliases to
+ search. It supports wildcards (`*`). To search all data streams and indices,
+ omit this parameter or use `*` or `_all`.
:param aggregations: Defines the aggregations that are run as part of the search
request.
:param aggs: Defines the aggregations that are run as part of the search request.
@@ -4545,45 +4647,46 @@ async def search(
This behavior applies even if the request targets other open indices. For
example, a request targeting `foo*,bar*` returns an error if an index starts
with `foo` but no index starts with `bar`.
- :param allow_partial_search_results: If true, returns partial results if there
- are shard request timeouts or shard failures. If false, returns an error
- with no partial results.
- :param analyze_wildcard: If true, wildcard and prefix queries are analyzed. This
- parameter can only be used when the q query string parameter is specified.
- :param analyzer: Analyzer to use for the query string. This parameter can only
- be used when the q query string parameter is specified.
+ :param allow_partial_search_results: If `true` and there are shard request timeouts
+ or shard failures, the request returns partial results. If `false`, it returns
+ an error with no partial results. To override the default behavior, you can
+ set the `search.default_allow_partial_results` cluster setting to `false`.
+ :param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
+ This parameter can be used only when the `q` query string parameter is specified.
+ :param analyzer: The analyzer to use for the query string. This parameter can
+ be used only when the `q` query string parameter is specified.
:param batched_reduce_size: The number of shard results that should be reduced
- at once on the coordinating node. This value should be used as a protection
- mechanism to reduce the memory overhead per search request if the potential
- number of shards in the request can be large.
- :param ccs_minimize_roundtrips: If true, network round-trips between the coordinating
- node and the remote clusters are minimized when executing cross-cluster search
+ at once on the coordinating node. If the potential number of shards in the
+ request can be large, this value should be used as a protection mechanism
+ to reduce the memory overhead per search request.
+ :param ccs_minimize_roundtrips: If `true`, network round-trips between the coordinating
+ node and the remote clusters are minimized when running cross-cluster search
(CCS) requests.
:param collapse: Collapses search results the values of the specified field.
- :param default_operator: The default operator for query string query: AND or
- OR. This parameter can only be used when the `q` query string parameter is
- specified.
- :param df: Field to use as default where no field prefix is given in the query
- string. This parameter can only be used when the q query string parameter
+ :param default_operator: The default operator for the query string query: `AND`
+ or `OR`. This parameter can be used only when the `q` query string parameter
is specified.
- :param docvalue_fields: Array of wildcard (`*`) patterns. The request returns
- doc values for field names matching these patterns in the `hits.fields` property
- of the response.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`.
- :param explain: If true, returns detailed information about score computation
- as part of a hit.
+ :param df: The field to use as a default when no field prefix is given in the
+ query string. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param docvalue_fields: An array of wildcard (`*`) field patterns. The request
+ returns doc values for field names matching these patterns in the `hits.fields`
+ property of the response.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values such as `open,hidden`.
+ :param explain: If `true`, the request returns detailed information about score
+ computation as part of a hit.
:param ext: Configuration of search extensions defined by Elasticsearch plugins.
- :param fields: Array of wildcard (`*`) patterns. The request returns values for
- field names matching these patterns in the `hits.fields` property of the
- response.
+ :param fields: An array of wildcard (`*`) field patterns. The request returns
+ values for field names matching these patterns in the `hits.fields` property
+ of the response.
:param force_synthetic_source: Should this request force synthetic _source? Use
this to test if the mapping supports synthetic _source and to get a sense
of the worst case performance. Fetches with this enabled will be slower the
enabling synthetic source natively in the index.
- :param from_: Starting document offset. Needs to be non-negative. By default,
+ :param from_: The starting document offset, which must be non-negative. By default,
you cannot page through more than 10,000 hits using the `from` and `size`
parameters. To page through more hits, use the `search_after` parameter.
:param highlight: Specifies the highlighter to use for retrieving highlighted
@@ -4592,95 +4695,101 @@ async def search(
be ignored when frozen.
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
- :param include_named_queries_score: Indicates whether hit.matched_queries should
- be rendered as a map that includes the name of the matched query associated
- with its score (true) or as an array containing the name of the matched queries
- (false) This functionality reruns each named query on every hit in a search
- response. Typically, this adds a small overhead to a request. However, using
- computationally expensive named queries on a large number of hits may add
- significant overhead.
- :param indices_boost: Boosts the _score of documents from specified indices.
- :param knn: Defines the approximate kNN search to run.
+ :param include_named_queries_score: If `true`, the response includes the score
+ contribution from any named queries. This functionality reruns each named
+ query on every hit in a search response. Typically, this adds a small overhead
+ to a request. However, using computationally expensive named queries on a
+ large number of hits may add significant overhead.
+ :param indices_boost: Boost the `_score` of documents from specified indices.
+ The boost value is the factor by which scores are multiplied. A boost value
+ greater than `1.0` increases the score. A boost value between `0` and `1.0`
+ decreases the score.
+ :param knn: The approximate kNN search to run.
:param lenient: If `true`, format-based query failures (such as providing text
to a numeric field) in the query string will be ignored. This parameter can
- only be used when the `q` query string parameter is specified.
- :param max_concurrent_shard_requests: Defines the number of concurrent shard
- requests per node this search executes concurrently. This value should be
- used to limit the impact of the search on the cluster in order to limit the
- number of concurrent shard requests.
+ be used only when the `q` query string parameter is specified.
+ :param max_concurrent_shard_requests: The number of concurrent shard requests
+ per node that the search runs concurrently. This value should be used to
+ limit the impact of the search on the cluster in order to limit the number
+ of concurrent shard requests.
:param min_compatible_shard_node: The minimum version of the node that can handle
the request Any handling node with a lower version will fail the request.
- :param min_score: Minimum `_score` for matching documents. Documents with a lower
- `_score` are not included in the search results.
- :param pit: Limits the search to a point in time (PIT). If you provide a PIT,
+ :param min_score: The minimum `_score` for matching documents. Documents with
+ a lower `_score` are not included in the search results.
+ :param pit: Limit the search to a point in time (PIT). If you provide a PIT,
you cannot specify an `` in the request path.
:param post_filter: Use the `post_filter` parameter to filter search results.
The search hits are filtered after the aggregations are calculated. A post
filter has no impact on the aggregation results.
- :param pre_filter_shard_size: Defines a threshold that enforces a pre-filter
- roundtrip to prefilter search shards based on query rewriting if the number
- of shards the search request expands to exceeds the threshold. This filter
- roundtrip can limit the number of shards significantly if for instance a
- shard can not match any documents based on its rewrite method (if date filters
- are mandatory to match but the shard bounds and the query are disjoint).
- When unspecified, the pre-filter phase is executed if any of these conditions
- is met: the request targets more than 128 shards; the request targets one
- or more read-only index; the primary sort of the query targets an indexed
+ :param pre_filter_shard_size: A threshold that enforces a pre-filter roundtrip
+ to prefilter search shards based on query rewriting if the number of shards
+ the search request expands to exceeds the threshold. This filter roundtrip
+ can limit the number of shards significantly if for instance a shard can
+ not match any documents based on its rewrite method (if date filters are
+ mandatory to match but the shard bounds and the query are disjoint). When
+ unspecified, the pre-filter phase is executed if any of these conditions
+ is met: * The request targets more than 128 shards. * The request targets
+ one or more read-only index. * The primary sort of the query targets an indexed
field.
- :param preference: Nodes and shards used for the search. By default, Elasticsearch
+ :param preference: The nodes and shards used for the search. By default, Elasticsearch
selects from eligible nodes and shards using adaptive replica selection,
- accounting for allocation awareness. Valid values are: `_only_local` to run
- the search only on shards on the local node; `_local` to, if possible, run
- the search on shards on the local node, or if not, select shards using the
- default method; `_only_nodes:,` to run the search on only
- the specified nodes IDs, where, if suitable shards exist on more than one
- selected node, use shards on those nodes using the default method, or if
- none of the specified nodes are available, select shards from any available
- node using the default method; `_prefer_nodes:,` to if
+ accounting for allocation awareness. Valid values are: * `_only_local` to
+ run the search only on shards on the local node; * `_local` to, if possible,
+ run the search on shards on the local node, or if not, select shards using
+ the default method; * `_only_nodes:,` to run the search
+ on only the specified nodes IDs, where, if suitable shards exist on more
+ than one selected node, use shards on those nodes using the default method,
+ or if none of the specified nodes are available, select shards from any available
+ node using the default method; * `_prefer_nodes:,` to if
possible, run the search on the specified nodes IDs, or if not, select shards
- using the default method; `_shards:,` to run the search only
- on the specified shards; `` (any string that does not start
+ using the default method; * `_shards:,` to run the search only
+ on the specified shards; * `` (any string that does not start
with `_`) to route searches with the same `` to the same shards
in the same order.
:param profile: Set to `true` to return detailed timing information about the
execution of individual components in a search request. NOTE: This is a debugging
tool and adds significant overhead to search execution.
- :param q: Query in the Lucene query string syntax using query parameter search.
- Query parameter searches do not support the full Elasticsearch Query DSL
- but are handy for testing.
- :param query: Defines the search definition using the Query DSL.
- :param rank: Defines the Reciprocal Rank Fusion (RRF) to use.
+ :param q: A query in the Lucene query string syntax. Query parameter searches
+ do not support the full Elasticsearch Query DSL but are handy for testing.
+ IMPORTANT: This parameter overrides the query parameter in the request body.
+ If both parameters are specified, documents matching the query request body
+ parameter are not returned.
+ :param query: The search definition using the Query DSL.
+ :param rank: The Reciprocal Rank Fusion (RRF) to use.
:param request_cache: If `true`, the caching of search results is enabled for
- requests where `size` is `0`. Defaults to index level settings.
+ requests where `size` is `0`. It defaults to index level settings.
:param rescore: Can be used to improve precision by reordering just the top (for
example 100 - 500) documents returned by the `query` and `post_filter` phases.
:param rest_total_hits_as_int: Indicates whether `hits.total` should be rendered
as an integer or an object in the rest search response.
:param retriever: A retriever is a specification to describe top documents returned
from a search. A retriever replaces other elements of the search API that
- also return top documents such as query and knn.
- :param routing: Custom value used to route operations to a specific shard.
- :param runtime_mappings: Defines one or more runtime fields in the search request.
- These fields take precedence over mapped fields with the same name.
+ also return top documents such as `query` and `knn`.
+ :param routing: A custom value that is used to route operations to a specific
+ shard.
+ :param runtime_mappings: One or more runtime fields in the search request. These
+ fields take precedence over mapped fields with the same name.
:param script_fields: Retrieve a script evaluation (based on different fields)
for each hit.
- :param scroll: Period to retain the search context for scrolling. See Scroll
- search results. By default, this value cannot exceed `1d` (24 hours). You
- can change this limit using the `search.max_keep_alive` cluster-level setting.
+ :param scroll: The period to retain the search context for scrolling. By default,
+ this value cannot exceed `1d` (24 hours). You can change this limit by using
+ the `search.max_keep_alive` cluster-level setting.
:param search_after: Used to retrieve the next page of hits using a set of sort
values from the previous page.
- :param search_type: How distributed term frequencies are calculated for relevance
- scoring.
- :param seq_no_primary_term: If `true`, returns sequence number and primary term
- of the last modification of each hit.
- :param size: The number of hits to return. By default, you cannot page through
- more than 10,000 hits using the `from` and `size` parameters. To page through
- more hits, use the `search_after` parameter.
- :param slice: Can be used to split a scrolled search into multiple slices that
- can be consumed independently.
+ :param search_type: Indicates how distributed term frequencies are calculated
+ for relevance scoring.
+ :param seq_no_primary_term: If `true`, the request returns sequence number and
+ primary term of the last modification of each hit.
+ :param size: The number of hits to return, which must not be negative. By default,
+ you cannot page through more than 10,000 hits using the `from` and `size`
+ parameters. To page through more hits, use the `search_after` property.
+ :param slice: Split a scrolled search into multiple slices that can be consumed
+ independently.
:param sort: A comma-separated list of : pairs.
- :param source: Indicates which source fields are returned for matching documents.
- These fields are returned in the hits._source property of the search response.
+ :param source: The source fields that are returned for matching documents. These
+ fields are returned in the `hits._source` property of the search response.
+ If the `stored_fields` property is specified, the `_source` property defaults
+ to `false`. Otherwise, it defaults to `true`.
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -4690,45 +4799,46 @@ async def search(
returned. You can exclude fields from this subset using the `_source_excludes`
query parameter. If the `_source` parameter is `false`, this parameter is
ignored.
- :param stats: Stats groups to associate with the search. Each group maintains
+ :param stats: The stats groups to associate with the search. Each group maintains
a statistics aggregation for its associated searches. You can retrieve these
stats using the indices stats API.
- :param stored_fields: List of stored fields to return as part of a hit. If no
- fields are specified, no stored fields are included in the response. If this
- field is specified, the `_source` parameter defaults to `false`. You can
- pass `_source: true` to return both source fields and stored fields in the
- search response.
+ :param stored_fields: A comma-separated list of stored fields to return as part
+ of a hit. If no fields are specified, no stored fields are included in the
+ response. If this field is specified, the `_source` property defaults to
+ `false`. You can pass `_source: true` to return both source fields and stored
+ fields in the search response.
:param suggest: Defines a suggester that provides similar looking terms based
on a provided text.
- :param suggest_field: Specifies which field to use for suggestions.
- :param suggest_mode: Specifies the suggest mode. This parameter can only be used
- when the `suggest_field` and `suggest_text` query string parameters are specified.
- :param suggest_size: Number of suggestions to return. This parameter can only
- be used when the `suggest_field` and `suggest_text` query string parameters
+ :param suggest_field: The field to use for suggestions.
+ :param suggest_mode: The suggest mode. This parameter can be used only when the
+ `suggest_field` and `suggest_text` query string parameters are specified.
+ :param suggest_size: The number of suggestions to return. This parameter can
+ be used only when the `suggest_field` and `suggest_text` query string parameters
are specified.
:param suggest_text: The source text for which the suggestions should be returned.
- This parameter can only be used when the `suggest_field` and `suggest_text`
+ This parameter can be used only when the `suggest_field` and `suggest_text`
query string parameters are specified.
- :param terminate_after: Maximum number of documents to collect for each shard.
+ :param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
- Elasticsearch collects documents before sorting. Use with caution. Elasticsearch
- applies this parameter to each shard handling the request. When possible,
- let Elasticsearch perform early termination automatically. Avoid specifying
- this parameter for requests that target data streams with backing indices
- across multiple data tiers. If set to `0` (default), the query does not terminate
- early.
- :param timeout: Specifies the period of time to wait for a response from each
- shard. If no response is received before the timeout expires, the request
- fails and returns an error. Defaults to no timeout.
- :param track_scores: If true, calculate and return document scores, even if the
- scores are not used for sorting.
+ Elasticsearch collects documents before sorting. IMPORTANT: Use with caution.
+ Elasticsearch applies this property to each shard handling the request. When
+ possible, let Elasticsearch perform early termination automatically. Avoid
+ specifying this property for requests that target data streams with backing
+ indices across multiple data tiers. If set to `0` (default), the query does
+ not terminate early.
+ :param timeout: The period of time to wait for a response from each shard. If
+ no response is received before the timeout expires, the request fails and
+ returns an error. Defaults to no timeout.
+ :param track_scores: If `true`, calculate and return document scores, even if
+ the scores are not used for sorting.
:param track_total_hits: Number of hits matching the query to count accurately.
If `true`, the exact number of hits is returned at the cost of some performance.
If `false`, the response does not include the total number of hits matching
the query.
:param typed_keys: If `true`, aggregation and suggester names are be prefixed
by their respective types in the response.
- :param version: If true, returns document version as part of a hit.
+ :param version: If `true`, the request returns the document version as part of
+ a hit.
"""
__path_parts: t.Dict[str, str]
if index not in SKIP_IN_PATH:
diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py
index 2edac7ded..02b315efb 100644
--- a/elasticsearch/_async/client/indices.py
+++ b/elasticsearch/_async/client/indices.py
@@ -230,6 +230,51 @@ async def analyze(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def cancel_migrate_reindex(
+ self,
+ *,
+ index: t.Union[str, t.Sequence[str]],
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Cancel a migration reindex operation.
+ Cancel a migration reindex attempt for a data stream or index.
+
+
+ ``_
+
+ :param index: The index or data stream name
+ """
+ if index in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'index'")
+ __path_parts: t.Dict[str, str] = {"index": _quote(index)}
+ __path = f'/_migration/reindex/{__path_parts["index"]}/_cancel'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="indices.cancel_migrate_reindex",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
async def clear_cache(
self,
@@ -710,6 +755,71 @@ async def create_data_stream(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_name="create_from",
+ )
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def create_from(
+ self,
+ *,
+ source: str,
+ dest: str,
+ create_from: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an index from a source index.
+ Copy the mappings and settings from the source index to a destination index while allowing request settings and mappings to override the source values.
+
+
+ ``_
+
+ :param source: The source index or data stream name
+ :param dest: The destination index or data stream name
+ :param create_from:
+ """
+ if source in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'source'")
+ if dest in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'dest'")
+ if create_from is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'create_from' and 'body', one of them should be set."
+ )
+ elif create_from is not None and body is not None:
+ raise ValueError("Cannot set both 'create_from' and 'body'")
+ __path_parts: t.Dict[str, str] = {
+ "source": _quote(source),
+ "dest": _quote(dest),
+ }
+ __path = f'/_create_from/{__path_parts["source"]}/{__path_parts["dest"]}'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = create_from if create_from is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="indices.create_from",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
async def data_streams_stats(
self,
@@ -2585,6 +2695,51 @@ async def get_mapping(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def get_migrate_reindex_status(
+ self,
+ *,
+ index: t.Union[str, t.Sequence[str]],
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Get the migration reindexing status.
+ Get the status of a migration reindex attempt for a data stream or index.
+
+
+ ``_
+
+ :param index: The index or data stream name.
+ """
+ if index in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'index'")
+ __path_parts: t.Dict[str, str] = {"index": _quote(index)}
+ __path = f'/_migration/reindex/{__path_parts["index"]}/_status'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "GET",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="indices.get_migrate_reindex_status",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
async def get_settings(
self,
@@ -2754,6 +2909,62 @@ async def get_template(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_name="reindex",
+ )
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def migrate_reindex(
+ self,
+ *,
+ reindex: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Reindex legacy backing indices.
+ Reindex all legacy backing indices for a data stream.
+ This operation occurs in a persistent task.
+ The persistent task ID is returned immediately and the reindexing work is completed in that task.
+
+
+ ``_
+
+ :param reindex:
+ """
+ if reindex is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'reindex' and 'body', one of them should be set."
+ )
+ elif reindex is not None and body is not None:
+ raise ValueError("Cannot set both 'reindex' and 'body'")
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_migration/reindex"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = reindex if reindex is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="indices.migrate_reindex",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
async def migrate_to_data_stream(
self,
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
index 243849fb8..67187220a 100644
--- a/elasticsearch/_sync/client/__init__.py
+++ b/elasticsearch/_sync/client/__init__.py
@@ -1453,74 +1453,146 @@ def delete_by_query(
"""
.. raw:: html
- Delete documents.
- Deletes documents that match the specified query.
+ Delete documents.
+ Deletes documents that match the specified query.
+ If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:
+
+ read
+ delete
or write
+
+ You can specify the query criteria in the request URI or the request body using the same syntax as the search API.
+ When you submit a delete by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and deletes matching documents using internal versioning.
+ If a document changes between the time that the snapshot is taken and the delete operation is processed, it results in a version conflict and the delete operation fails.
+ NOTE: Documents with a version equal to 0 cannot be deleted using delete by query because internal versioning does not support 0 as a valid version number.
+ While processing a delete by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents to delete.
+ A bulk delete request is performed for each batch of matching documents.
+ If a search or bulk request is rejected, the requests are retried up to 10 times, with exponential back off.
+ If the maximum retry limit is reached, processing halts and all failed requests are returned in the response.
+ Any delete requests that completed successfully still stick, they are not rolled back.
+ You can opt to count version conflicts instead of halting and returning by setting conflicts
to proceed
.
+ Note that if you opt to count version conflicts the operation could attempt to delete more documents from the source than max_docs
until it has successfully deleted max_docs documents
, or it has gone through every document in the source query.
+ Throttling delete requests
+ To control the rate at which delete by query issues batches of delete operations, you can set requests_per_second
to any positive decimal number.
+ This pads each batch with a wait time to throttle the rate.
+ Set requests_per_second
to -1
to disable throttling.
+ Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
+ The padding time is the difference between the batch size divided by the requests_per_second
and the time spent writing.
+ By default the batch size is 1000
, so if requests_per_second
is set to 500
:
+ target_time = 1000 / 500 per second = 2 seconds
+ wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
+
+ Since the batch is issued as a single _bulk
request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set.
+ This is "bursty" instead of "smooth".
+ Slicing
+ Delete by query supports sliced scroll to parallelize the delete process.
+ This can improve efficiency and provide a convenient way to break the request down into smaller parts.
+ Setting slices
to auto
lets Elasticsearch choose the number of slices to use.
+ This setting will use one slice per shard, up to a certain limit.
+ If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.
+ Adding slices to the delete by query operation creates sub-requests which means it has some quirks:
+
+ - You can see these requests in the tasks APIs. These sub-requests are "child" tasks of the task for the request with slices.
+ - Fetching the status of the task for the request with slices only contains the status of completed slices.
+ - These sub-requests are individually addressable for things like cancellation and rethrottling.
+ - Rethrottling the request with
slices
will rethrottle the unfinished sub-request proportionally.
+ - Canceling the request with
slices
will cancel each sub-request.
+ - Due to the nature of
slices
each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.
+ - Parameters like
requests_per_second
and max_docs
on a request with slices
are distributed proportionally to each sub-request. Combine that with the earlier point about distribution being uneven and you should conclude that using max_docs
with slices
might not result in exactly max_docs
documents being deleted.
+ - Each sub-request gets a slightly different snapshot of the source data stream or index though these are all taken at approximately the same time.
+
+ If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:
+
+ - Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many
slices
hurts performance. Setting slices
higher than the number of shards generally does not improve efficiency and adds overhead.
+ - Delete performance scales linearly across available resources with the number of slices.
+
+ Whether query or delete performance dominates the runtime depends on the documents being reindexed and cluster resources.
+ Cancel a delete by query operation
+ Any delete by query can be canceled using the task cancel API. For example:
+ POST _tasks/r1A2WoRbTwKZ516z6NEs5A:36619/_cancel
+
+ The task ID can be found by using the get tasks API.
+ Cancellation should happen quickly but might take a few seconds.
+ The get task status API will continue to list the delete by query task until this task checks that it has been cancelled and terminates itself.
``_
- :param index: Comma-separated list of data streams, indices, and aliases to search.
- Supports wildcards (`*`). To search all data streams or indices, omit this
- parameter or use `*` or `_all`.
+ :param index: A comma-separated list of data streams, indices, and aliases to
+ search. It supports wildcards (`*`). To search all data streams or indices,
+ omit this parameter or use `*` or `_all`.
:param allow_no_indices: If `false`, the request returns an error if any wildcard
expression, index alias, or `_all` value targets only missing or closed indices.
This behavior applies even if the request targets other open indices. For
example, a request targeting `foo*,bar*` returns an error if an index starts
with `foo` but no index starts with `bar`.
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
- :param analyzer: Analyzer to use for the query string.
+ This parameter can be used only when the `q` query string parameter is specified.
+ :param analyzer: Analyzer to use for the query string. This parameter can be
+ used only when the `q` query string parameter is specified.
:param conflicts: What to do if delete by query hits version conflicts: `abort`
or `proceed`.
:param default_operator: The default operator for query string query: `AND` or
- `OR`.
- :param df: Field to use as default where no field prefix is given in the query
- string.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
+ `OR`. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param df: The field to use as default where no field prefix is given in the
+ query string. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values, such as `open,hidden`.
:param from_: Starting offset (default: 0)
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
:param lenient: If `true`, format-based query failures (such as providing text
- to a numeric field) in the query string will be ignored.
+ to a numeric field) in the query string will be ignored. This parameter can
+ be used only when the `q` query string parameter is specified.
:param max_docs: The maximum number of documents to delete.
- :param preference: Specifies the node or shard the operation should be performed
- on. Random by default.
- :param q: Query in the Lucene query string syntax.
- :param query: Specifies the documents to delete using the Query DSL.
+ :param preference: The node or shard the operation should be performed on. It
+ is random by default.
+ :param q: A query in the Lucene query string syntax.
+ :param query: The documents to delete specified with Query DSL.
:param refresh: If `true`, Elasticsearch refreshes all shards involved in the
- delete by query after the request completes.
+ delete by query after the request completes. This is different than the delete
+ API's `refresh` parameter, which causes just the shard that received the
+ delete request to be refreshed. Unlike the delete API, it does not support
+ `wait_for`.
:param request_cache: If `true`, the request cache is used for this request.
Defaults to the index-level setting.
:param requests_per_second: The throttle for this request in sub-requests per
second.
- :param routing: Custom value used to route operations to a specific shard.
- :param scroll: Period to retain the search context for scrolling.
- :param scroll_size: Size of the scroll request that powers the operation.
- :param search_timeout: Explicit timeout for each search request. Defaults to
- no timeout.
- :param search_type: The type of the search operation. Available options: `query_then_fetch`,
- `dfs_query_then_fetch`.
+ :param routing: A custom value used to route operations to a specific shard.
+ :param scroll: The period to retain the search context for scrolling.
+ :param scroll_size: The size of the scroll request that powers the operation.
+ :param search_timeout: The explicit timeout for each search request. It defaults
+ to no timeout.
+ :param search_type: The type of the search operation. Available options include
+ `query_then_fetch` and `dfs_query_then_fetch`.
:param slice: Slice the request manually using the provided slice ID and total
number of slices.
:param slices: The number of slices this task should be divided into.
- :param sort: A comma-separated list of : pairs.
- :param stats: Specific `tag` of the request for logging and statistical purposes.
- :param terminate_after: Maximum number of documents to collect for each shard.
+ :param sort: A comma-separated list of `:` pairs.
+ :param stats: The specific `tag` of the request for logging and statistical purposes.
+ :param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
Elasticsearch collects documents before sorting. Use with caution. Elasticsearch
applies this parameter to each shard handling the request. When possible,
let Elasticsearch perform early termination automatically. Avoid specifying
this parameter for requests that target data streams with backing indices
across multiple data tiers.
- :param timeout: Period each deletion request waits for active shards.
+ :param timeout: The period each deletion request waits for active shards.
:param version: If `true`, returns the document version as part of a hit.
:param wait_for_active_shards: The number of shard copies that must be active
- before proceeding with the operation. Set to all or any positive integer
- up to the total number of shards in the index (`number_of_replicas+1`).
+ before proceeding with the operation. Set to `all` or any positive integer
+ up to the total number of shards in the index (`number_of_replicas+1`). The
+ `timeout` value controls how long each write request waits for unavailable
+ shards to become available.
:param wait_for_completion: If `true`, the request blocks until the operation
- is complete.
+ is complete. If `false`, Elasticsearch performs some preflight checks, launches
+ the request, and returns a task you can use to cancel or get the status of
+ the task. Elasticsearch creates a record of this task as a document at `.tasks/task/${taskId}`.
+ When you are done with a task, you should delete the task document so Elasticsearch
+ can reclaim the space.
"""
if index in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'index'")
@@ -1640,11 +1712,11 @@ def delete_by_query_rethrottle(
Rethrottling that speeds up the query takes effect immediately but rethrotting that slows down the query takes effect after completing the current batch to prevent scroll timeouts.
- ``_
+ ``_
:param task_id: The ID for the task.
:param requests_per_second: The throttle for this request in sub-requests per
- second.
+ second. To disable throttling, set it to `-1`.
"""
if task_id in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'task_id'")
@@ -3595,30 +3667,50 @@ def open_point_in_time(
search requests using the same point in time. For example, if refreshes happen between
search_after
requests, then the results of those requests might not be consistent as changes happening
between searches are only visible to the more recent point in time.
- A point in time must be opened explicitly before being used in search requests.
- The keep_alive
parameter tells Elasticsearch how long it should persist.
+ A point in time must be opened explicitly before being used in search requests.
+ A subsequent search request with the pit
parameter must not specify index
, routing
, or preference
values as these parameters are copied from the point in time.
+ Just like regular searches, you can use from
and size
to page through point in time search results, up to the first 10,000 hits.
+ If you want to retrieve more hits, use PIT with search_after
.
+ IMPORTANT: The open point in time request and each subsequent search request can return different identifiers; always use the most recently received ID for the next search request.
+ When a PIT that contains shard failures is used in a search request, the missing are always reported in the search response as a NoShardAvailableActionException
exception.
+ To get rid of these exceptions, a new PIT needs to be created so that shards missing from the previous PIT can be handled, assuming they become available in the meantime.
+ Keeping point in time alive
+ The keep_alive
parameter, which is passed to a open point in time request and search request, extends the time to live of the corresponding point in time.
+ The value does not need to be long enough to process all data — it just needs to be long enough for the next request.
+ Normally, the background merge process optimizes the index by merging together smaller segments to create new, bigger segments.
+ Once the smaller segments are no longer needed they are deleted.
+ However, open point-in-times prevent the old segments from being deleted since they are still in use.
+ TIP: Keeping older segments alive means that more disk space and file handles are needed.
+ Ensure that you have configured your nodes to have ample free file handles.
+ Additionally, if a segment contains deleted or updated documents then the point in time must keep track of whether each document in the segment was live at the time of the initial search request.
+ Ensure that your nodes have sufficient heap space if you have many open point-in-times on an index that is subject to ongoing deletes or updates.
+ Note that a point-in-time doesn't prevent its associated indices from being deleted.
+ You can check how many point-in-times (that is, search contexts) are open with the nodes stats API.
``_
:param index: A comma-separated list of index names to open point in time; use
`_all` or empty string to perform the operation on all indices
- :param keep_alive: Extends the time to live of the corresponding point in time.
- :param allow_partial_search_results: If `false`, creating a point in time request
- when a shard is missing or unavailable will throw an exception. If `true`,
- the point in time will contain all the shards that are available at the time
- of the request.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
+ :param keep_alive: Extend the length of time that the point in time persists.
+ :param allow_partial_search_results: Indicates whether the point in time tolerates
+ unavailable shards or shard failures when initially creating the PIT. If
+ `false`, creating a point in time request when a shard is missing or unavailable
+ will throw an exception. If `true`, the point in time will contain all the
+ shards that are available at the time of the request.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`,
+ `hidden`, `none`.
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
- :param index_filter: Allows to filter indices if the provided query rewrites
- to `match_none` on every shard.
- :param preference: Specifies the node or shard the operation should be performed
- on. Random by default.
- :param routing: Custom value used to route operations to a specific shard.
+ :param index_filter: Filter indices if the provided query rewrites to `match_none`
+ on every shard.
+ :param preference: The node or shard the operation should be performed on. By
+ default, it is random.
+ :param routing: A custom value that is used to route operations to a specific
+ shard.
"""
if index in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'index'")
@@ -4528,13 +4620,23 @@ def search(
Get search hits that match the query defined in the request.
You can provide search queries using the q
query string parameter or the request body.
If both are specified, only the query parameter is used.
+ If the Elasticsearch security features are enabled, you must have the read index privilege for the target data stream, index, or alias. For cross-cluster search, refer to the documentation about configuring CCS privileges.
+ To search a point in time (PIT) for an alias, you must have the read
index privilege for the alias's data streams or indices.
+ Search slicing
+ When paging through a large number of documents, it can be helpful to split the search into multiple slices to consume them independently with the slice
and pit
properties.
+ By default the splitting is done first on the shards, then locally on each shard.
+ The local splitting partitions the shard into contiguous ranges based on Lucene document IDs.
+ For instance if the number of shards is equal to 2 and you request 4 slices, the slices 0 and 2 are assigned to the first shard and the slices 1 and 3 are assigned to the second shard.
+ IMPORTANT: The same point-in-time ID should be used for all slices.
+ If different PIT IDs are used, slices can overlap and miss documents.
+ This situation can occur because the splitting criterion is based on Lucene document IDs, which are not stable across changes to the index.
``_
- :param index: Comma-separated list of data streams, indices, and aliases to search.
- Supports wildcards (`*`). To search all data streams and indices, omit this
- parameter or use `*` or `_all`.
+ :param index: A comma-separated list of data streams, indices, and aliases to
+ search. It supports wildcards (`*`). To search all data streams and indices,
+ omit this parameter or use `*` or `_all`.
:param aggregations: Defines the aggregations that are run as part of the search
request.
:param aggs: Defines the aggregations that are run as part of the search request.
@@ -4543,45 +4645,46 @@ def search(
This behavior applies even if the request targets other open indices. For
example, a request targeting `foo*,bar*` returns an error if an index starts
with `foo` but no index starts with `bar`.
- :param allow_partial_search_results: If true, returns partial results if there
- are shard request timeouts or shard failures. If false, returns an error
- with no partial results.
- :param analyze_wildcard: If true, wildcard and prefix queries are analyzed. This
- parameter can only be used when the q query string parameter is specified.
- :param analyzer: Analyzer to use for the query string. This parameter can only
- be used when the q query string parameter is specified.
+ :param allow_partial_search_results: If `true` and there are shard request timeouts
+ or shard failures, the request returns partial results. If `false`, it returns
+ an error with no partial results. To override the default behavior, you can
+ set the `search.default_allow_partial_results` cluster setting to `false`.
+ :param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
+ This parameter can be used only when the `q` query string parameter is specified.
+ :param analyzer: The analyzer to use for the query string. This parameter can
+ be used only when the `q` query string parameter is specified.
:param batched_reduce_size: The number of shard results that should be reduced
- at once on the coordinating node. This value should be used as a protection
- mechanism to reduce the memory overhead per search request if the potential
- number of shards in the request can be large.
- :param ccs_minimize_roundtrips: If true, network round-trips between the coordinating
- node and the remote clusters are minimized when executing cross-cluster search
+ at once on the coordinating node. If the potential number of shards in the
+ request can be large, this value should be used as a protection mechanism
+ to reduce the memory overhead per search request.
+ :param ccs_minimize_roundtrips: If `true`, network round-trips between the coordinating
+ node and the remote clusters are minimized when running cross-cluster search
(CCS) requests.
:param collapse: Collapses search results the values of the specified field.
- :param default_operator: The default operator for query string query: AND or
- OR. This parameter can only be used when the `q` query string parameter is
- specified.
- :param df: Field to use as default where no field prefix is given in the query
- string. This parameter can only be used when the q query string parameter
+ :param default_operator: The default operator for the query string query: `AND`
+ or `OR`. This parameter can be used only when the `q` query string parameter
is specified.
- :param docvalue_fields: Array of wildcard (`*`) patterns. The request returns
- doc values for field names matching these patterns in the `hits.fields` property
- of the response.
- :param expand_wildcards: Type of index that wildcard patterns can match. If the
- request can target data streams, this argument determines whether wildcard
- expressions match hidden data streams. Supports comma-separated values, such
- as `open,hidden`.
- :param explain: If true, returns detailed information about score computation
- as part of a hit.
+ :param df: The field to use as a default when no field prefix is given in the
+ query string. This parameter can be used only when the `q` query string parameter
+ is specified.
+ :param docvalue_fields: An array of wildcard (`*`) field patterns. The request
+ returns doc values for field names matching these patterns in the `hits.fields`
+ property of the response.
+ :param expand_wildcards: The type of index that wildcard patterns can match.
+ If the request can target data streams, this argument determines whether
+ wildcard expressions match hidden data streams. It supports comma-separated
+ values such as `open,hidden`.
+ :param explain: If `true`, the request returns detailed information about score
+ computation as part of a hit.
:param ext: Configuration of search extensions defined by Elasticsearch plugins.
- :param fields: Array of wildcard (`*`) patterns. The request returns values for
- field names matching these patterns in the `hits.fields` property of the
- response.
+ :param fields: An array of wildcard (`*`) field patterns. The request returns
+ values for field names matching these patterns in the `hits.fields` property
+ of the response.
:param force_synthetic_source: Should this request force synthetic _source? Use
this to test if the mapping supports synthetic _source and to get a sense
of the worst case performance. Fetches with this enabled will be slower the
enabling synthetic source natively in the index.
- :param from_: Starting document offset. Needs to be non-negative. By default,
+ :param from_: The starting document offset, which must be non-negative. By default,
you cannot page through more than 10,000 hits using the `from` and `size`
parameters. To page through more hits, use the `search_after` parameter.
:param highlight: Specifies the highlighter to use for retrieving highlighted
@@ -4590,95 +4693,101 @@ def search(
be ignored when frozen.
:param ignore_unavailable: If `false`, the request returns an error if it targets
a missing or closed index.
- :param include_named_queries_score: Indicates whether hit.matched_queries should
- be rendered as a map that includes the name of the matched query associated
- with its score (true) or as an array containing the name of the matched queries
- (false) This functionality reruns each named query on every hit in a search
- response. Typically, this adds a small overhead to a request. However, using
- computationally expensive named queries on a large number of hits may add
- significant overhead.
- :param indices_boost: Boosts the _score of documents from specified indices.
- :param knn: Defines the approximate kNN search to run.
+ :param include_named_queries_score: If `true`, the response includes the score
+ contribution from any named queries. This functionality reruns each named
+ query on every hit in a search response. Typically, this adds a small overhead
+ to a request. However, using computationally expensive named queries on a
+ large number of hits may add significant overhead.
+ :param indices_boost: Boost the `_score` of documents from specified indices.
+ The boost value is the factor by which scores are multiplied. A boost value
+ greater than `1.0` increases the score. A boost value between `0` and `1.0`
+ decreases the score.
+ :param knn: The approximate kNN search to run.
:param lenient: If `true`, format-based query failures (such as providing text
to a numeric field) in the query string will be ignored. This parameter can
- only be used when the `q` query string parameter is specified.
- :param max_concurrent_shard_requests: Defines the number of concurrent shard
- requests per node this search executes concurrently. This value should be
- used to limit the impact of the search on the cluster in order to limit the
- number of concurrent shard requests.
+ be used only when the `q` query string parameter is specified.
+ :param max_concurrent_shard_requests: The number of concurrent shard requests
+ per node that the search runs concurrently. This value should be used to
+ limit the impact of the search on the cluster in order to limit the number
+ of concurrent shard requests.
:param min_compatible_shard_node: The minimum version of the node that can handle
the request Any handling node with a lower version will fail the request.
- :param min_score: Minimum `_score` for matching documents. Documents with a lower
- `_score` are not included in the search results.
- :param pit: Limits the search to a point in time (PIT). If you provide a PIT,
+ :param min_score: The minimum `_score` for matching documents. Documents with
+ a lower `_score` are not included in the search results.
+ :param pit: Limit the search to a point in time (PIT). If you provide a PIT,
you cannot specify an `` in the request path.
:param post_filter: Use the `post_filter` parameter to filter search results.
The search hits are filtered after the aggregations are calculated. A post
filter has no impact on the aggregation results.
- :param pre_filter_shard_size: Defines a threshold that enforces a pre-filter
- roundtrip to prefilter search shards based on query rewriting if the number
- of shards the search request expands to exceeds the threshold. This filter
- roundtrip can limit the number of shards significantly if for instance a
- shard can not match any documents based on its rewrite method (if date filters
- are mandatory to match but the shard bounds and the query are disjoint).
- When unspecified, the pre-filter phase is executed if any of these conditions
- is met: the request targets more than 128 shards; the request targets one
- or more read-only index; the primary sort of the query targets an indexed
+ :param pre_filter_shard_size: A threshold that enforces a pre-filter roundtrip
+ to prefilter search shards based on query rewriting if the number of shards
+ the search request expands to exceeds the threshold. This filter roundtrip
+ can limit the number of shards significantly if for instance a shard can
+ not match any documents based on its rewrite method (if date filters are
+ mandatory to match but the shard bounds and the query are disjoint). When
+ unspecified, the pre-filter phase is executed if any of these conditions
+ is met: * The request targets more than 128 shards. * The request targets
+ one or more read-only index. * The primary sort of the query targets an indexed
field.
- :param preference: Nodes and shards used for the search. By default, Elasticsearch
+ :param preference: The nodes and shards used for the search. By default, Elasticsearch
selects from eligible nodes and shards using adaptive replica selection,
- accounting for allocation awareness. Valid values are: `_only_local` to run
- the search only on shards on the local node; `_local` to, if possible, run
- the search on shards on the local node, or if not, select shards using the
- default method; `_only_nodes:,` to run the search on only
- the specified nodes IDs, where, if suitable shards exist on more than one
- selected node, use shards on those nodes using the default method, or if
- none of the specified nodes are available, select shards from any available
- node using the default method; `_prefer_nodes:,` to if
+ accounting for allocation awareness. Valid values are: * `_only_local` to
+ run the search only on shards on the local node; * `_local` to, if possible,
+ run the search on shards on the local node, or if not, select shards using
+ the default method; * `_only_nodes:,` to run the search
+ on only the specified nodes IDs, where, if suitable shards exist on more
+ than one selected node, use shards on those nodes using the default method,
+ or if none of the specified nodes are available, select shards from any available
+ node using the default method; * `_prefer_nodes:,` to if
possible, run the search on the specified nodes IDs, or if not, select shards
- using the default method; `_shards:,` to run the search only
- on the specified shards; `` (any string that does not start
+ using the default method; * `_shards:,` to run the search only
+ on the specified shards; * `` (any string that does not start
with `_`) to route searches with the same `` to the same shards
in the same order.
:param profile: Set to `true` to return detailed timing information about the
execution of individual components in a search request. NOTE: This is a debugging
tool and adds significant overhead to search execution.
- :param q: Query in the Lucene query string syntax using query parameter search.
- Query parameter searches do not support the full Elasticsearch Query DSL
- but are handy for testing.
- :param query: Defines the search definition using the Query DSL.
- :param rank: Defines the Reciprocal Rank Fusion (RRF) to use.
+ :param q: A query in the Lucene query string syntax. Query parameter searches
+ do not support the full Elasticsearch Query DSL but are handy for testing.
+ IMPORTANT: This parameter overrides the query parameter in the request body.
+ If both parameters are specified, documents matching the query request body
+ parameter are not returned.
+ :param query: The search definition using the Query DSL.
+ :param rank: The Reciprocal Rank Fusion (RRF) to use.
:param request_cache: If `true`, the caching of search results is enabled for
- requests where `size` is `0`. Defaults to index level settings.
+ requests where `size` is `0`. It defaults to index level settings.
:param rescore: Can be used to improve precision by reordering just the top (for
example 100 - 500) documents returned by the `query` and `post_filter` phases.
:param rest_total_hits_as_int: Indicates whether `hits.total` should be rendered
as an integer or an object in the rest search response.
:param retriever: A retriever is a specification to describe top documents returned
from a search. A retriever replaces other elements of the search API that
- also return top documents such as query and knn.
- :param routing: Custom value used to route operations to a specific shard.
- :param runtime_mappings: Defines one or more runtime fields in the search request.
- These fields take precedence over mapped fields with the same name.
+ also return top documents such as `query` and `knn`.
+ :param routing: A custom value that is used to route operations to a specific
+ shard.
+ :param runtime_mappings: One or more runtime fields in the search request. These
+ fields take precedence over mapped fields with the same name.
:param script_fields: Retrieve a script evaluation (based on different fields)
for each hit.
- :param scroll: Period to retain the search context for scrolling. See Scroll
- search results. By default, this value cannot exceed `1d` (24 hours). You
- can change this limit using the `search.max_keep_alive` cluster-level setting.
+ :param scroll: The period to retain the search context for scrolling. By default,
+ this value cannot exceed `1d` (24 hours). You can change this limit by using
+ the `search.max_keep_alive` cluster-level setting.
:param search_after: Used to retrieve the next page of hits using a set of sort
values from the previous page.
- :param search_type: How distributed term frequencies are calculated for relevance
- scoring.
- :param seq_no_primary_term: If `true`, returns sequence number and primary term
- of the last modification of each hit.
- :param size: The number of hits to return. By default, you cannot page through
- more than 10,000 hits using the `from` and `size` parameters. To page through
- more hits, use the `search_after` parameter.
- :param slice: Can be used to split a scrolled search into multiple slices that
- can be consumed independently.
+ :param search_type: Indicates how distributed term frequencies are calculated
+ for relevance scoring.
+ :param seq_no_primary_term: If `true`, the request returns sequence number and
+ primary term of the last modification of each hit.
+ :param size: The number of hits to return, which must not be negative. By default,
+ you cannot page through more than 10,000 hits using the `from` and `size`
+ parameters. To page through more hits, use the `search_after` property.
+ :param slice: Split a scrolled search into multiple slices that can be consumed
+ independently.
:param sort: A comma-separated list of : pairs.
- :param source: Indicates which source fields are returned for matching documents.
- These fields are returned in the hits._source property of the search response.
+ :param source: The source fields that are returned for matching documents. These
+ fields are returned in the `hits._source` property of the search response.
+ If the `stored_fields` property is specified, the `_source` property defaults
+ to `false`. Otherwise, it defaults to `true`.
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -4688,45 +4797,46 @@ def search(
returned. You can exclude fields from this subset using the `_source_excludes`
query parameter. If the `_source` parameter is `false`, this parameter is
ignored.
- :param stats: Stats groups to associate with the search. Each group maintains
+ :param stats: The stats groups to associate with the search. Each group maintains
a statistics aggregation for its associated searches. You can retrieve these
stats using the indices stats API.
- :param stored_fields: List of stored fields to return as part of a hit. If no
- fields are specified, no stored fields are included in the response. If this
- field is specified, the `_source` parameter defaults to `false`. You can
- pass `_source: true` to return both source fields and stored fields in the
- search response.
+ :param stored_fields: A comma-separated list of stored fields to return as part
+ of a hit. If no fields are specified, no stored fields are included in the
+ response. If this field is specified, the `_source` property defaults to
+ `false`. You can pass `_source: true` to return both source fields and stored
+ fields in the search response.
:param suggest: Defines a suggester that provides similar looking terms based
on a provided text.
- :param suggest_field: Specifies which field to use for suggestions.
- :param suggest_mode: Specifies the suggest mode. This parameter can only be used
- when the `suggest_field` and `suggest_text` query string parameters are specified.
- :param suggest_size: Number of suggestions to return. This parameter can only
- be used when the `suggest_field` and `suggest_text` query string parameters
+ :param suggest_field: The field to use for suggestions.
+ :param suggest_mode: The suggest mode. This parameter can be used only when the
+ `suggest_field` and `suggest_text` query string parameters are specified.
+ :param suggest_size: The number of suggestions to return. This parameter can
+ be used only when the `suggest_field` and `suggest_text` query string parameters
are specified.
:param suggest_text: The source text for which the suggestions should be returned.
- This parameter can only be used when the `suggest_field` and `suggest_text`
+ This parameter can be used only when the `suggest_field` and `suggest_text`
query string parameters are specified.
- :param terminate_after: Maximum number of documents to collect for each shard.
+ :param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
- Elasticsearch collects documents before sorting. Use with caution. Elasticsearch
- applies this parameter to each shard handling the request. When possible,
- let Elasticsearch perform early termination automatically. Avoid specifying
- this parameter for requests that target data streams with backing indices
- across multiple data tiers. If set to `0` (default), the query does not terminate
- early.
- :param timeout: Specifies the period of time to wait for a response from each
- shard. If no response is received before the timeout expires, the request
- fails and returns an error. Defaults to no timeout.
- :param track_scores: If true, calculate and return document scores, even if the
- scores are not used for sorting.
+ Elasticsearch collects documents before sorting. IMPORTANT: Use with caution.
+ Elasticsearch applies this property to each shard handling the request. When
+ possible, let Elasticsearch perform early termination automatically. Avoid
+ specifying this property for requests that target data streams with backing
+ indices across multiple data tiers. If set to `0` (default), the query does
+ not terminate early.
+ :param timeout: The period of time to wait for a response from each shard. If
+ no response is received before the timeout expires, the request fails and
+ returns an error. Defaults to no timeout.
+ :param track_scores: If `true`, calculate and return document scores, even if
+ the scores are not used for sorting.
:param track_total_hits: Number of hits matching the query to count accurately.
If `true`, the exact number of hits is returned at the cost of some performance.
If `false`, the response does not include the total number of hits matching
the query.
:param typed_keys: If `true`, aggregation and suggester names are be prefixed
by their respective types in the response.
- :param version: If true, returns document version as part of a hit.
+ :param version: If `true`, the request returns the document version as part of
+ a hit.
"""
__path_parts: t.Dict[str, str]
if index not in SKIP_IN_PATH:
diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py
index 39e513ea1..b4774cea3 100644
--- a/elasticsearch/_sync/client/indices.py
+++ b/elasticsearch/_sync/client/indices.py
@@ -230,6 +230,51 @@ def analyze(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def cancel_migrate_reindex(
+ self,
+ *,
+ index: t.Union[str, t.Sequence[str]],
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Cancel a migration reindex operation.
+ Cancel a migration reindex attempt for a data stream or index.
+
+
+ ``_
+
+ :param index: The index or data stream name
+ """
+ if index in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'index'")
+ __path_parts: t.Dict[str, str] = {"index": _quote(index)}
+ __path = f'/_migration/reindex/{__path_parts["index"]}/_cancel'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="indices.cancel_migrate_reindex",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
def clear_cache(
self,
@@ -710,6 +755,71 @@ def create_data_stream(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_name="create_from",
+ )
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def create_from(
+ self,
+ *,
+ source: str,
+ dest: str,
+ create_from: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an index from a source index.
+ Copy the mappings and settings from the source index to a destination index while allowing request settings and mappings to override the source values.
+
+
+ ``_
+
+ :param source: The source index or data stream name
+ :param dest: The destination index or data stream name
+ :param create_from:
+ """
+ if source in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'source'")
+ if dest in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'dest'")
+ if create_from is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'create_from' and 'body', one of them should be set."
+ )
+ elif create_from is not None and body is not None:
+ raise ValueError("Cannot set both 'create_from' and 'body'")
+ __path_parts: t.Dict[str, str] = {
+ "source": _quote(source),
+ "dest": _quote(dest),
+ }
+ __path = f'/_create_from/{__path_parts["source"]}/{__path_parts["dest"]}'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = create_from if create_from is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="indices.create_from",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
def data_streams_stats(
self,
@@ -2585,6 +2695,51 @@ def get_mapping(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def get_migrate_reindex_status(
+ self,
+ *,
+ index: t.Union[str, t.Sequence[str]],
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Get the migration reindexing status.
+ Get the status of a migration reindex attempt for a data stream or index.
+
+
+ ``_
+
+ :param index: The index or data stream name.
+ """
+ if index in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'index'")
+ __path_parts: t.Dict[str, str] = {"index": _quote(index)}
+ __path = f'/_migration/reindex/{__path_parts["index"]}/_status'
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "GET",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="indices.get_migrate_reindex_status",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
def get_settings(
self,
@@ -2754,6 +2909,62 @@ def get_template(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_name="reindex",
+ )
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def migrate_reindex(
+ self,
+ *,
+ reindex: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Reindex legacy backing indices.
+ Reindex all legacy backing indices for a data stream.
+ This operation occurs in a persistent task.
+ The persistent task ID is returned immediately and the reindexing work is completed in that task.
+
+
+ ``_
+
+ :param reindex:
+ """
+ if reindex is None and body is None:
+ raise ValueError(
+ "Empty value passed for parameters 'reindex' and 'body', one of them should be set."
+ )
+ elif reindex is not None and body is not None:
+ raise ValueError("Cannot set both 'reindex' and 'body'")
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_migration/reindex"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __body = reindex if reindex is not None else body
+ __headers = {"accept": "application/json", "content-type": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="indices.migrate_reindex",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters()
def migrate_to_data_stream(
self,