diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index f88bb0190..4a8ddc8fc 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -1468,7 +1468,7 @@ async def delete_by_query( If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text @@ -3307,7 +3307,8 @@ async def msearch( computationally expensive named queries on a large number of hits may add significant overhead. :param max_concurrent_searches: Maximum number of concurrent searches the multi - search API can execute. + search API can execute. Defaults to `max(1, (# of data nodes * min(search + thread pool size, 10)))`. :param max_concurrent_shard_requests: Maximum number of concurrent shard requests that each sub-search request executes per node. :param pre_filter_shard_size: Defines a threshold that enforces a pre-filter @@ -3635,6 +3636,7 @@ async def open_point_in_time( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, index_filter: t.Optional[t.Mapping[str, t.Any]] = None, + max_concurrent_shard_requests: t.Optional[int] = None, preference: t.Optional[str] = None, pretty: t.Optional[bool] = None, routing: t.Optional[str] = None, @@ -3690,6 +3692,8 @@ async def open_point_in_time( a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` on every shard. + :param max_concurrent_shard_requests: Maximum number of concurrent shard requests + that each sub-search request executes per node. :param preference: The node or shard the operation should be performed on. By default, it is random. :param routing: A custom value that is used to route operations to a specific @@ -3717,6 +3721,8 @@ async def open_point_in_time( __query["human"] = human if ignore_unavailable is not None: __query["ignore_unavailable"] = ignore_unavailable + if max_concurrent_shard_requests is not None: + __query["max_concurrent_shard_requests"] = max_concurrent_shard_requests if preference is not None: __query["preference"] = preference if pretty is not None: @@ -4257,7 +4263,7 @@ async def render_search_template( human: t.Optional[bool] = None, params: t.Optional[t.Mapping[str, t.Any]] = None, pretty: t.Optional[bool] = None, - source: t.Optional[str] = None, + source: t.Optional[t.Union[str, t.Mapping[str, t.Any]]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -5661,7 +5667,7 @@ async def search_template( search_type: t.Optional[ t.Union[str, t.Literal["dfs_query_then_fetch", "query_then_fetch"]] ] = None, - source: t.Optional[str] = None, + source: t.Optional[t.Union[str, t.Mapping[str, t.Any]]] = None, typed_keys: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: @@ -6399,7 +6405,7 @@ async def update_by_query( wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py index 6c8469e72..133fdd1cd 100644 --- a/elasticsearch/_async/client/inference.py +++ b/elasticsearch/_async/client/inference.py @@ -234,6 +234,67 @@ async def get( path_parts=__path_parts, ) + @_rewrite_parameters( + body_name="chat_completion_request", + ) + async def post_eis_chat_completion( + self, + *, + eis_inference_id: str, + chat_completion_request: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Perform a chat completion task through the Elastic Inference Service (EIS).

+

Perform a chat completion inference task with the elastic service.

+ + + ``_ + + :param eis_inference_id: The unique identifier of the inference endpoint. + :param chat_completion_request: + """ + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if chat_completion_request is None and body is None: + raise ValueError( + "Empty value passed for parameters 'chat_completion_request' and 'body', one of them should be set." + ) + elif chat_completion_request is not None and body is not None: + raise ValueError("Cannot set both 'chat_completion_request' and 'body'") + __path_parts: t.Dict[str, str] = {"eis_inference_id": _quote(eis_inference_id)} + __path = ( + f'/_inference/chat_completion/{__path_parts["eis_inference_id"]}/_stream' + ) + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __body = ( + chat_completion_request if chat_completion_request is not None else body + ) + __headers = {"accept": "application/json", "content-type": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "POST", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.post_eis_chat_completion", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_name="inference_config", ) @@ -322,52 +383,69 @@ async def put( ) @_rewrite_parameters( - body_fields=("service", "service_settings"), + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), ) - async def put_eis( + async def put_alibabacloud( self, *, - task_type: t.Union[str, t.Literal["chat_completion"]], - eis_inference_id: str, - service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + task_type: t.Union[ + str, t.Literal["completion", "rerank", "space_embedding", "text_embedding"] + ], + alibabacloud_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["alibabacloud-ai-search"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html -

Create an Elastic Inference Service (EIS) inference endpoint.

-

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+

Create an AlibabaCloud AI Search inference endpoint.

+

Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

- ``_ + ``_ :param task_type: The type of the inference task that the model will perform. - NOTE: The `chat_completion` task type only supports streaming and only through - the _stream API. - :param eis_inference_id: The unique identifier of the inference endpoint. + :param alibabacloud_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In - this case, `elastic`. + this case, `alibabacloud-ai-search`. :param service_settings: Settings used to install the inference model. These - settings are specific to the `elastic` service. + settings are specific to the `alibabacloud-ai-search` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. """ if task_type in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'task_type'") - if eis_inference_id in SKIP_IN_PATH: - raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if alibabacloud_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'alibabacloud_inference_id'" + ) if service is None and body is None: raise ValueError("Empty value passed for parameter 'service'") if service_settings is None and body is None: raise ValueError("Empty value passed for parameter 'service_settings'") __path_parts: t.Dict[str, str] = { "task_type": _quote(task_type), - "eis_inference_id": _quote(eis_inference_id), + "alibabacloud_inference_id": _quote(alibabacloud_inference_id), } - __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["alibabacloud_inference_id"]}' __query: t.Dict[str, t.Any] = {} __body: t.Dict[str, t.Any] = body if body is not None else {} if error_trace is not None: @@ -383,6 +461,10 @@ async def put_eis( __body["service"] = service if service_settings is not None: __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -394,7 +476,1237 @@ async def put_eis( params=__query, headers=__headers, body=__body, - endpoint_id="inference.put_eis", + endpoint_id="inference.put_alibabacloud", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_amazonbedrock( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + amazonbedrock_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazonbedrock"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Amazon Bedrock inference endpoint.

+

Creates an inference endpoint to perform an inference task with the amazonbedrock service.

+
+

info + You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

+
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonbedrock_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazonbedrock`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazonbedrock` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonbedrock_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonbedrock_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonbedrock_inference_id": _quote(amazonbedrock_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonbedrock_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonbedrock", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_anthropic( + self, + *, + task_type: t.Union[str, t.Literal["completion"]], + anthropic_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["anthropic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Anthropic inference endpoint.

+

Create an inference endpoint to perform an inference task with the anthropic service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `completion`. + :param anthropic_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `anthropic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `watsonxai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if anthropic_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'anthropic_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "anthropic_inference_id": _quote(anthropic_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["anthropic_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_anthropic", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_azureaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure AI studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param azureaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureaistudio_inference_id": _quote(azureaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_azureopenai( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureopenai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureopenai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureopenai service.

+

The list of chat completion models that you can choose from in your Azure OpenAI deployment include:

+ +

The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param azureopenai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureopenai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `azureopenai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureopenai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureopenai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureopenai_inference_id": _quote(azureopenai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureopenai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureopenai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_cohere( + self, + *, + task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]], + cohere_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["cohere"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Cohere inference endpoint.

+

Create an inference endpoint to perform an inference task with the cohere service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param cohere_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `cohere`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `cohere` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if cohere_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'cohere_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "cohere_inference_id": _quote(cohere_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["cohere_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_cohere", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings"), + ) + async def put_eis( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion"]], + eis_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elastic Inference Service (EIS) inference endpoint.

+

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param eis_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elastic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elastic` service. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "eis_inference_id": _quote(eis_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_eis", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_elasticsearch( + self, + *, + task_type: t.Union[ + str, t.Literal["rerank", "sparse_embedding", "text_embedding"] + ], + elasticsearch_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elasticsearch"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elasticsearch inference endpoint.

+

Create an inference endpoint to perform an inference task with the elasticsearch service.

+
+

info + Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.

+
+

If you use the ELSER or the E5 model through the elasticsearch service, the API request will automatically download and deploy the model if it isn't downloaded yet.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elasticsearch_inference_id: The unique identifier of the inference endpoint. + The must not match the `model_id`. + :param service: The type of service supported for the specified task type. In + this case, `elasticsearch`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elasticsearch` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elasticsearch_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'elasticsearch_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elasticsearch_inference_id": _quote(elasticsearch_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elasticsearch_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elasticsearch", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_elser( + self, + *, + task_type: t.Union[str, t.Literal["sparse_embedding"]], + elser_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elser"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an ELSER inference endpoint.

+

Create an inference endpoint to perform an inference task with the elser service. + You can also deploy ELSER by using the Elasticsearch inference integration.

+
+

info + Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.

+
+

The API request will automatically download and deploy the ELSER model if it isn't already downloaded.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elser_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elser`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elser` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elser_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'elser_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elser_inference_id": _quote(elser_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elser_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elser", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_googleaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + googleaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googleaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Google AI Studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the googleaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googleaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googleaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googleaistudio` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googleaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googleaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googleaistudio_inference_id": _quote(googleaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googleaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googleaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_googlevertexai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + googlevertexai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Google Vertex AI inference endpoint.

+

Create an inference endpoint to perform an inference task with the googlevertexai service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googlevertexai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googlevertexai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googlevertexai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googlevertexai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googlevertexai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googlevertexai_inference_id": _quote(googlevertexai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googlevertexai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googlevertexai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_hugging_face( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + huggingface_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Hugging Face inference endpoint.

+

Create an inference endpoint to perform an inference task with the hugging_face service.

+

You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. + Create the endpoint and copy the URL after the endpoint initialization has been finished.

+

The following models are recommended for the Hugging Face service:

+
    +
  • all-MiniLM-L6-v2
  • +
  • all-MiniLM-L12-v2
  • +
  • all-mpnet-base-v2
  • +
  • e5-base-v2
  • +
  • e5-small-v2
  • +
  • multilingual-e5-base
  • +
  • multilingual-e5-small
  • +
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param huggingface_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `hugging_face`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `hugging_face` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if huggingface_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'huggingface_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "huggingface_inference_id": _quote(huggingface_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["huggingface_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_hugging_face", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_jinaai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + jinaai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["jinaai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an JinaAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the jinaai service.

+

To review the available rerank models, refer to https://jina.ai/reranker. + To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param jinaai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `jinaai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `jinaai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if jinaai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'jinaai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "jinaai_inference_id": _quote(jinaai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["jinaai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_jinaai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_mistral( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + mistral_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Mistral inference endpoint.

+

Creates an inference endpoint to perform an inference task with the mistral service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `text_embedding`. + :param mistral_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `mistral`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `mistral` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if mistral_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'mistral_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "mistral_inference_id": _quote(mistral_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["mistral_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_mistral", path_parts=__path_parts, ) @@ -427,7 +1739,7 @@ async def put_openai( .. raw:: html

Create an OpenAI inference endpoint.

-

Create an inference endpoint to perform an inference task with the openai service.

+

Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. @@ -435,7 +1747,7 @@ async def put_openai( Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

- ``_ + ``_ :param task_type: The type of the inference task that the model will perform. NOTE: The `chat_completion` task type only supports streaming and only through diff --git a/elasticsearch/_async/client/watcher.py b/elasticsearch/_async/client/watcher.py index d51776c45..30f69d0e7 100644 --- a/elasticsearch/_async/client/watcher.py +++ b/elasticsearch/_async/client/watcher.py @@ -845,7 +845,10 @@ async def update_settings(

Update Watcher index settings. Update settings for the Watcher internal index (.watches). Only a subset of settings can be modified. - This includes index.auto_expand_replicas and index.number_of_replicas.

+ This includes index.auto_expand_replicas, index.number_of_replicas, index.routing.allocation.exclude.*, + index.routing.allocation.include.* and index.routing.allocation.require.*. + Modification of index.routing.allocation.include._tier_preference is an exception and is not allowed as the + Watcher shards must always be in the data_content tier.

``_ diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py index b39cbae26..667be8cad 100644 --- a/elasticsearch/_sync/client/__init__.py +++ b/elasticsearch/_sync/client/__init__.py @@ -1466,7 +1466,7 @@ def delete_by_query( If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text @@ -3305,7 +3305,8 @@ def msearch( computationally expensive named queries on a large number of hits may add significant overhead. :param max_concurrent_searches: Maximum number of concurrent searches the multi - search API can execute. + search API can execute. Defaults to `max(1, (# of data nodes * min(search + thread pool size, 10)))`. :param max_concurrent_shard_requests: Maximum number of concurrent shard requests that each sub-search request executes per node. :param pre_filter_shard_size: Defines a threshold that enforces a pre-filter @@ -3633,6 +3634,7 @@ def open_point_in_time( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, index_filter: t.Optional[t.Mapping[str, t.Any]] = None, + max_concurrent_shard_requests: t.Optional[int] = None, preference: t.Optional[str] = None, pretty: t.Optional[bool] = None, routing: t.Optional[str] = None, @@ -3688,6 +3690,8 @@ def open_point_in_time( a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` on every shard. + :param max_concurrent_shard_requests: Maximum number of concurrent shard requests + that each sub-search request executes per node. :param preference: The node or shard the operation should be performed on. By default, it is random. :param routing: A custom value that is used to route operations to a specific @@ -3715,6 +3719,8 @@ def open_point_in_time( __query["human"] = human if ignore_unavailable is not None: __query["ignore_unavailable"] = ignore_unavailable + if max_concurrent_shard_requests is not None: + __query["max_concurrent_shard_requests"] = max_concurrent_shard_requests if preference is not None: __query["preference"] = preference if pretty is not None: @@ -4255,7 +4261,7 @@ def render_search_template( human: t.Optional[bool] = None, params: t.Optional[t.Mapping[str, t.Any]] = None, pretty: t.Optional[bool] = None, - source: t.Optional[str] = None, + source: t.Optional[t.Union[str, t.Mapping[str, t.Any]]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -5659,7 +5665,7 @@ def search_template( search_type: t.Optional[ t.Union[str, t.Literal["dfs_query_then_fetch", "query_then_fetch"]] ] = None, - source: t.Optional[str] = None, + source: t.Optional[t.Union[str, t.Mapping[str, t.Any]]] = None, typed_keys: t.Optional[bool] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: @@ -6397,7 +6403,7 @@ def update_by_query( wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py index cbd62993d..8321dcc7d 100644 --- a/elasticsearch/_sync/client/inference.py +++ b/elasticsearch/_sync/client/inference.py @@ -234,6 +234,67 @@ def get( path_parts=__path_parts, ) + @_rewrite_parameters( + body_name="chat_completion_request", + ) + def post_eis_chat_completion( + self, + *, + eis_inference_id: str, + chat_completion_request: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Perform a chat completion task through the Elastic Inference Service (EIS).

+

Perform a chat completion inference task with the elastic service.

+ + + ``_ + + :param eis_inference_id: The unique identifier of the inference endpoint. + :param chat_completion_request: + """ + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if chat_completion_request is None and body is None: + raise ValueError( + "Empty value passed for parameters 'chat_completion_request' and 'body', one of them should be set." + ) + elif chat_completion_request is not None and body is not None: + raise ValueError("Cannot set both 'chat_completion_request' and 'body'") + __path_parts: t.Dict[str, str] = {"eis_inference_id": _quote(eis_inference_id)} + __path = ( + f'/_inference/chat_completion/{__path_parts["eis_inference_id"]}/_stream' + ) + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __body = ( + chat_completion_request if chat_completion_request is not None else body + ) + __headers = {"accept": "application/json", "content-type": "application/json"} + return self.perform_request( # type: ignore[return-value] + "POST", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.post_eis_chat_completion", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_name="inference_config", ) @@ -322,52 +383,69 @@ def put( ) @_rewrite_parameters( - body_fields=("service", "service_settings"), + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), ) - def put_eis( + def put_alibabacloud( self, *, - task_type: t.Union[str, t.Literal["chat_completion"]], - eis_inference_id: str, - service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + task_type: t.Union[ + str, t.Literal["completion", "rerank", "space_embedding", "text_embedding"] + ], + alibabacloud_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["alibabacloud-ai-search"]]] = None, service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, body: t.Optional[t.Dict[str, t.Any]] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html -

Create an Elastic Inference Service (EIS) inference endpoint.

-

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+

Create an AlibabaCloud AI Search inference endpoint.

+

Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

- ``_ + ``_ :param task_type: The type of the inference task that the model will perform. - NOTE: The `chat_completion` task type only supports streaming and only through - the _stream API. - :param eis_inference_id: The unique identifier of the inference endpoint. + :param alibabacloud_inference_id: The unique identifier of the inference endpoint. :param service: The type of service supported for the specified task type. In - this case, `elastic`. + this case, `alibabacloud-ai-search`. :param service_settings: Settings used to install the inference model. These - settings are specific to the `elastic` service. + settings are specific to the `alibabacloud-ai-search` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. """ if task_type in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'task_type'") - if eis_inference_id in SKIP_IN_PATH: - raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if alibabacloud_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'alibabacloud_inference_id'" + ) if service is None and body is None: raise ValueError("Empty value passed for parameter 'service'") if service_settings is None and body is None: raise ValueError("Empty value passed for parameter 'service_settings'") __path_parts: t.Dict[str, str] = { "task_type": _quote(task_type), - "eis_inference_id": _quote(eis_inference_id), + "alibabacloud_inference_id": _quote(alibabacloud_inference_id), } - __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["alibabacloud_inference_id"]}' __query: t.Dict[str, t.Any] = {} __body: t.Dict[str, t.Any] = body if body is not None else {} if error_trace is not None: @@ -383,6 +461,10 @@ def put_eis( __body["service"] = service if service_settings is not None: __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -394,7 +476,1237 @@ def put_eis( params=__query, headers=__headers, body=__body, - endpoint_id="inference.put_eis", + endpoint_id="inference.put_alibabacloud", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_amazonbedrock( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + amazonbedrock_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazonbedrock"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Amazon Bedrock inference endpoint.

+

Creates an inference endpoint to perform an inference task with the amazonbedrock service.

+
+

info + You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

+
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonbedrock_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazonbedrock`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazonbedrock` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonbedrock_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonbedrock_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonbedrock_inference_id": _quote(amazonbedrock_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonbedrock_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonbedrock", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_anthropic( + self, + *, + task_type: t.Union[str, t.Literal["completion"]], + anthropic_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["anthropic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Anthropic inference endpoint.

+

Create an inference endpoint to perform an inference task with the anthropic service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `completion`. + :param anthropic_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `anthropic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `watsonxai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if anthropic_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'anthropic_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "anthropic_inference_id": _quote(anthropic_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["anthropic_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_anthropic", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_azureaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure AI studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param azureaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureaistudio_inference_id": _quote(azureaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_azureopenai( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureopenai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureopenai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureopenai service.

+

The list of chat completion models that you can choose from in your Azure OpenAI deployment include:

+ +

The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param azureopenai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureopenai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `azureopenai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureopenai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureopenai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureopenai_inference_id": _quote(azureopenai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureopenai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureopenai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_cohere( + self, + *, + task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]], + cohere_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["cohere"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Cohere inference endpoint.

+

Create an inference endpoint to perform an inference task with the cohere service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param cohere_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `cohere`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `cohere` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if cohere_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'cohere_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "cohere_inference_id": _quote(cohere_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["cohere_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_cohere", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings"), + ) + def put_eis( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion"]], + eis_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elastic Inference Service (EIS) inference endpoint.

+

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param eis_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elastic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elastic` service. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "eis_inference_id": _quote(eis_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_eis", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_elasticsearch( + self, + *, + task_type: t.Union[ + str, t.Literal["rerank", "sparse_embedding", "text_embedding"] + ], + elasticsearch_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elasticsearch"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elasticsearch inference endpoint.

+

Create an inference endpoint to perform an inference task with the elasticsearch service.

+
+

info + Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.

+
+

If you use the ELSER or the E5 model through the elasticsearch service, the API request will automatically download and deploy the model if it isn't downloaded yet.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elasticsearch_inference_id: The unique identifier of the inference endpoint. + The must not match the `model_id`. + :param service: The type of service supported for the specified task type. In + this case, `elasticsearch`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elasticsearch` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elasticsearch_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'elasticsearch_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elasticsearch_inference_id": _quote(elasticsearch_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elasticsearch_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elasticsearch", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_elser( + self, + *, + task_type: t.Union[str, t.Literal["sparse_embedding"]], + elser_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elser"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an ELSER inference endpoint.

+

Create an inference endpoint to perform an inference task with the elser service. + You can also deploy ELSER by using the Elasticsearch inference integration.

+
+

info + Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.

+
+

The API request will automatically download and deploy the ELSER model if it isn't already downloaded.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elser_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elser`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elser` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elser_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'elser_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elser_inference_id": _quote(elser_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elser_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elser", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_googleaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + googleaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googleaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Google AI Studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the googleaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googleaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googleaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googleaistudio` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googleaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googleaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googleaistudio_inference_id": _quote(googleaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googleaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googleaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_googlevertexai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + googlevertexai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Google Vertex AI inference endpoint.

+

Create an inference endpoint to perform an inference task with the googlevertexai service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googlevertexai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googlevertexai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googlevertexai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googlevertexai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googlevertexai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googlevertexai_inference_id": _quote(googlevertexai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googlevertexai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googlevertexai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_hugging_face( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + huggingface_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Hugging Face inference endpoint.

+

Create an inference endpoint to perform an inference task with the hugging_face service.

+

You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. + Create the endpoint and copy the URL after the endpoint initialization has been finished.

+

The following models are recommended for the Hugging Face service:

+
    +
  • all-MiniLM-L6-v2
  • +
  • all-MiniLM-L12-v2
  • +
  • all-mpnet-base-v2
  • +
  • e5-base-v2
  • +
  • e5-small-v2
  • +
  • multilingual-e5-base
  • +
  • multilingual-e5-small
  • +
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param huggingface_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `hugging_face`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `hugging_face` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if huggingface_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'huggingface_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "huggingface_inference_id": _quote(huggingface_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["huggingface_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_hugging_face", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_jinaai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + jinaai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["jinaai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an JinaAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the jinaai service.

+

To review the available rerank models, refer to https://jina.ai/reranker. + To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param jinaai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `jinaai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `jinaai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if jinaai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'jinaai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "jinaai_inference_id": _quote(jinaai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["jinaai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_jinaai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_mistral( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + mistral_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Mistral inference endpoint.

+

Creates an inference endpoint to perform an inference task with the mistral service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `text_embedding`. + :param mistral_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `mistral`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `mistral` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if mistral_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'mistral_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "mistral_inference_id": _quote(mistral_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["mistral_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_mistral", path_parts=__path_parts, ) @@ -427,7 +1739,7 @@ def put_openai( .. raw:: html

Create an OpenAI inference endpoint.

-

Create an inference endpoint to perform an inference task with the openai service.

+

Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. @@ -435,7 +1747,7 @@ def put_openai( Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

- ``_ + ``_ :param task_type: The type of the inference task that the model will perform. NOTE: The `chat_completion` task type only supports streaming and only through diff --git a/elasticsearch/_sync/client/watcher.py b/elasticsearch/_sync/client/watcher.py index a266f54c4..92c70da27 100644 --- a/elasticsearch/_sync/client/watcher.py +++ b/elasticsearch/_sync/client/watcher.py @@ -845,7 +845,10 @@ def update_settings(

Update Watcher index settings. Update settings for the Watcher internal index (.watches). Only a subset of settings can be modified. - This includes index.auto_expand_replicas and index.number_of_replicas.

+ This includes index.auto_expand_replicas, index.number_of_replicas, index.routing.allocation.exclude.*, + index.routing.allocation.include.* and index.routing.allocation.require.*. + Modification of index.routing.allocation.include._tier_preference is an exception and is not allowed as the + Watcher shards must always be in the data_content tier.

``_