[8.0] Migrate helpers to use .options(...)

github-actions[bot] · sethmlarson · web-flow · commit 25d370553c17 · 2021-11-04T16:36:25.000-05:00
Co-authored-by: Seth Michael Larson &lt;seth.larson@elastic.co&gt;
diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
@@ -168,6 +168,7 @@ def __init__(
         sniffer_timeout=DEFAULT,
         sniff_on_connection_fail=DEFAULT,
         http_auth=DEFAULT,
+        maxsize=DEFAULT,
         # Internal use only
         _transport: Optional[AsyncTransport] = None,
     ) -> None:
@@ -226,6 +227,19 @@ def __init__(
             )
             sniff_on_node_failure = sniff_on_connection_fail
 
+        if maxsize is not DEFAULT:
+            if connections_per_node is not DEFAULT:
+                raise ValueError(
+                    "Can't specify both 'maxsize' and 'connections_per_node', "
+                    "instead only specify 'connections_per_node'"
+                )
+            warnings.warn(
+                "The 'maxsize' parameter is deprecated in favor of 'connections_per_node'",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            connections_per_node = maxsize
+
         # Setting min_delay_between_sniffing=True implies sniff_before_requests=True
         if min_delay_between_sniffing is not DEFAULT:
             sniff_before_requests = True
diff --git a/elasticsearch/_async/client/_base.py b/elasticsearch/_async/client/_base.py
@@ -233,6 +233,7 @@ def _default_sniffed_node_callback(
 class BaseClient:
     def __init__(self, _transport: AsyncTransport) -> None:
         self._transport = _transport
+        self._client_meta: Union[DefaultType, Tuple[Tuple[str, str], ...]] = DEFAULT
         self._headers = HttpHeaders({"content-type": "application/json"})
         self._request_timeout: Union[DefaultType, Optional[float]] = DEFAULT
         self._ignore_status: Union[DefaultType, Collection[int]] = DEFAULT
@@ -274,6 +275,7 @@ async def _perform_request(
             max_retries=self._max_retries,
             retry_on_status=self._retry_on_status,
             retry_on_timeout=self._retry_on_timeout,
+            client_meta=self._client_meta,
         )
 
         # HEAD with a 404 is returned as a normal response
@@ -320,11 +322,12 @@ async def _perform_request(
             warning_messages: Iterable[str] = _WARNING_RE.findall(warning_header) or (
                 warning_header,
             )
+            stacklevel = warn_stacklevel()
             for warning_message in warning_messages:
                 warnings.warn(
                     warning_message,
                     category=ElasticsearchWarning,
-                    stacklevel=warn_stacklevel(),
+                    stacklevel=stacklevel,
                 )
 
         if method == "HEAD":
diff --git a/elasticsearch/_async/helpers.py b/elasticsearch/_async/helpers.py
@@ -66,7 +66,7 @@ async def _process_bulk_chunk(
 
     try:
         # send the actual request
-        resp = await client.bulk("\n".join(bulk_actions) + "\n", *args, **kwargs)
+        resp = await client.bulk(*args, body=bulk_actions, **kwargs)
     except TransportError as e:
         gen = _process_bulk_chunk_error(
             error=e,
@@ -163,6 +163,9 @@ async def async_streaming_bulk(
     :arg ignore_status: list of HTTP status code that you want to ignore
     """
 
+    client = client.options()
+    client._client_meta = (("h", "bp"),)
+
     async def map_actions():
         async for item in aiter(actions):
             yield expand_action_callback(item)
@@ -333,35 +336,52 @@ async def async_scan(
         query = query.copy() if query else {}
         query["sort"] = "_doc"
 
-    # Grab options that should be propagated to every
-    # API call within this helper instead of just 'search()'
-    transport_kwargs = {}
-    for key in ("headers", "api_key", "http_auth"):
-        if key in kwargs:
-            transport_kwargs[key] = kwargs[key]
-
-    # If the user is using 'scroll_kwargs' we want
-    # to propagate there too, but to not break backwards
-    # compatibility we'll not override anything already given.
-    if scroll_kwargs is not None and transport_kwargs:
-        for key, val in transport_kwargs.items():
-            scroll_kwargs.setdefault(key, val)
+    def pop_transport_kwargs(kw):
+        # Grab options that should be propagated to every
+        # API call within this helper instead of just 'search()'
+        transport_kwargs = {}
+        for key in ("headers", "api_key", "http_auth", "basic_auth", "bearer_auth"):
+            try:
+                value = kw.pop(key)
+                if key == "http_auth":
+                    key = "basic_auth"
+                transport_kwargs[key] = value
+            except KeyError:
+                pass
+        return transport_kwargs
+
+    client = client.options(
+        request_timeout=request_timeout, **pop_transport_kwargs(kwargs)
+    )
+    client._client_meta = (("h", "s"),)
 
     # initial search
-    resp = await client.search(
-        body=query, scroll=scroll, size=size, request_timeout=request_timeout, **kwargs
-    )
-    scroll_id = resp.get("_scroll_id")
+    search_kwargs = query.copy() if query else {}
+    search_kwargs.update(kwargs)
+    search_kwargs["scroll"] = scroll
+    search_kwargs["size"] = size
 
     try:
-        while scroll_id and resp["hits"]["hits"]:
-            for hit in resp["hits"]["hits"]:
+        resp = await client.search(**search_kwargs)
+    except TypeError:
+        resp = await client.search(body=query, scroll=scroll, size=size, **kwargs)
+
+    scroll_id = resp.raw.get("_scroll_id")
+    scroll_transport_kwargs = pop_transport_kwargs(scroll_kwargs)
+    if scroll_transport_kwargs:
+        scroll_client = client.options(**scroll_transport_kwargs)
+    else:
+        scroll_client = client
+
+    try:
+        while scroll_id and resp.raw["hits"]["hits"]:
+            for hit in resp.raw["hits"]["hits"]:
                 yield hit
 
             # Default to 0 if the value isn't included in the response
-            shards_successful = resp["_shards"].get("successful", 0)
-            shards_skipped = resp["_shards"].get("skipped", 0)
-            shards_total = resp["_shards"].get("total", 0)
+            shards_successful = resp.raw["_shards"].get("successful", 0)
+            shards_skipped = resp.raw["_shards"].get("skipped", 0)
+            shards_total = resp.raw["_shards"].get("total", 0)
 
             # check if we have any errors
             if (shards_successful + shards_skipped) < shards_total:
@@ -382,19 +402,14 @@ async def async_scan(
                             shards_total,
                         ),
                     )
-            resp = await client.scroll(
-                body={"scroll_id": scroll_id, "scroll": scroll}, **scroll_kwargs
+            resp = await scroll_client.scroll(
+                scroll_id=scroll_id, scroll=scroll, **scroll_kwargs
             )
-            scroll_id = resp.get("_scroll_id")
+            scroll_id = resp.raw.get("_scroll_id")
 
     finally:
         if scroll_id and clear_scroll:
-            await client.clear_scroll(
-                body={"scroll_id": [scroll_id]},
-                **transport_kwargs,
-                ignore=(404,),
-                params={"__elastic_client_meta": (("h", "s"),)},
-            )
+            await client.options(ignore_status=404).clear_scroll(scroll_id=scroll_id)
 
 
 async def async_reindex(
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
@@ -168,6 +168,7 @@ def __init__(
         sniffer_timeout=DEFAULT,
         sniff_on_connection_fail=DEFAULT,
         http_auth=DEFAULT,
+        maxsize=DEFAULT,
         # Internal use only
         _transport: Optional[Transport] = None,
     ) -> None:
@@ -226,6 +227,19 @@ def __init__(
             )
             sniff_on_node_failure = sniff_on_connection_fail
 
+        if maxsize is not DEFAULT:
+            if connections_per_node is not DEFAULT:
+                raise ValueError(
+                    "Can't specify both 'maxsize' and 'connections_per_node', "
+                    "instead only specify 'connections_per_node'"
+                )
+            warnings.warn(
+                "The 'maxsize' parameter is deprecated in favor of 'connections_per_node'",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            connections_per_node = maxsize
+
         # Setting min_delay_between_sniffing=True implies sniff_before_requests=True
         if min_delay_between_sniffing is not DEFAULT:
             sniff_before_requests = True
diff --git a/elasticsearch/_sync/client/_base.py b/elasticsearch/_sync/client/_base.py
@@ -233,6 +233,7 @@ def _default_sniffed_node_callback(
 class BaseClient:
     def __init__(self, _transport: Transport) -> None:
         self._transport = _transport
+        self._client_meta: Union[DefaultType, Tuple[Tuple[str, str], ...]] = DEFAULT
         self._headers = HttpHeaders({"content-type": "application/json"})
         self._request_timeout: Union[DefaultType, Optional[float]] = DEFAULT
         self._ignore_status: Union[DefaultType, Collection[int]] = DEFAULT
@@ -274,6 +275,7 @@ def _perform_request(
             max_retries=self._max_retries,
             retry_on_status=self._retry_on_status,
             retry_on_timeout=self._retry_on_timeout,
+            client_meta=self._client_meta,
         )
 
         # HEAD with a 404 is returned as a normal response
@@ -320,11 +322,12 @@ def _perform_request(
             warning_messages: Iterable[str] = _WARNING_RE.findall(warning_header) or (
                 warning_header,
             )
+            stacklevel = warn_stacklevel()
             for warning_message in warning_messages:
                 warnings.warn(
                     warning_message,
                     category=ElasticsearchWarning,
-                    stacklevel=warn_stacklevel(),
+                    stacklevel=stacklevel,
                 )
 
         if method == "HEAD":
diff --git a/elasticsearch/_sync/client/utils.py b/elasticsearch/_sync/client/utils.py
@@ -181,7 +181,6 @@ def cloud_id_to_node_configs(cloud_id: str) -> List[NodeConfig]:
             host=host,
             port=port,
             http_compress=True,
-            # TODO: Set TLSv1.2+
         )
     ]
 
diff --git a/elasticsearch/helpers/actions.py b/elasticsearch/helpers/actions.py
@@ -169,7 +169,7 @@ def _process_bulk_chunk_success(resp, bulk_data, ignore_status, raise_on_error=T
 
     # go through request-response pairs and detect failures
     for data, (op_type, item) in zip(
-        bulk_data, map(methodcaller("popitem"), resp["items"])
+        bulk_data, map(methodcaller("popitem"), resp.raw["items"])
     ):
         status_code = item.get("status", 500)
 
@@ -232,14 +232,12 @@ def _process_bulk_chunk(
     """
     Send a bulk request to elasticsearch and process the output.
     """
-    kwargs = _add_helper_meta_to_kwargs(kwargs, "bp")
-
     if not isinstance(ignore_status, (list, tuple)):
         ignore_status = (ignore_status,)
 
     try:
         # send the actual request
-        resp = client.bulk("\n".join(bulk_actions) + "\n", *args, **kwargs)
+        resp = client.bulk(*args, body=bulk_actions, **kwargs)
     except TransportError as e:
         gen = _process_bulk_chunk_error(
             error=e,
@@ -315,6 +313,9 @@ def streaming_bulk(
     :arg yield_ok: if set to False will skip successful documents in the output
     :arg ignore_status: list of HTTP status code that you want to ignore
     """
+    client = client.options()
+    client._client_meta = (("h", "bp"),)
+
     actions = map(expand_action_callback, actions)
     serializer = client.transport.serializers.get_serializer("application/json")
 
@@ -542,40 +543,53 @@ def scan(
 
     """
     scroll_kwargs = scroll_kwargs or {}
-    _add_helper_meta_to_kwargs(scroll_kwargs, "s")
-
     if not preserve_order:
         query = query.copy() if query else {}
         query["sort"] = "_doc"
 
-    # Grab options that should be propagated to every
-    # API call within this helper instead of just 'search()'
-    transport_kwargs = {}
-    for key in ("headers", "api_key", "http_auth"):
-        if key in kwargs:
-            transport_kwargs[key] = kwargs[key]
-
-    # If the user is using 'scroll_kwargs' we want
-    # to propagate there too, but to not break backwards
-    # compatibility we'll not override anything already given.
-    if scroll_kwargs is not None and transport_kwargs:
-        for key, val in transport_kwargs.items():
-            scroll_kwargs.setdefault(key, val)
+    def pop_transport_kwargs(kw):
+        # Grab options that should be propagated to every
+        # API call within this helper instead of just 'search()'
+        transport_kwargs = {}
+        for key in ("headers", "api_key", "http_auth", "basic_auth", "bearer_auth"):
+            try:
+                value = kw.pop(key)
+                if key == "http_auth":
+                    key = "basic_auth"
+                transport_kwargs[key] = value
+            except KeyError:
+                pass
+        return transport_kwargs
+
+    client = client.options(
+        request_timeout=request_timeout, **pop_transport_kwargs(kwargs)
+    )
+    client._client_meta = (("h", "s"),)
 
     # initial search
-    resp = client.search(
-        body=query, scroll=scroll, size=size, request_timeout=request_timeout, **kwargs
-    )
-    scroll_id = resp.get("_scroll_id")
+    search_kwargs = query.copy() if query else {}
+    search_kwargs.update(kwargs)
+    search_kwargs["scroll"] = scroll
+    search_kwargs["size"] = size
+    try:
+        resp = client.search(**search_kwargs)
+    except TypeError:
+        resp = client.search(body=query, scroll=scroll, size=size, **kwargs)
+    scroll_id = resp.raw.get("_scroll_id")
+    scroll_transport_kwargs = pop_transport_kwargs(scroll_kwargs)
+    if scroll_transport_kwargs:
+        scroll_client = client.options(**scroll_transport_kwargs)
+    else:
+        scroll_client = client
 
     try:
-        while scroll_id and resp["hits"]["hits"]:
-            yield from resp["hits"]["hits"]
+        while scroll_id and resp.raw["hits"]["hits"]:
+            yield from resp.raw["hits"]["hits"]
 
             # Default to 0 if the value isn't included in the response
-            shards_successful = resp["_shards"].get("successful", 0)
-            shards_skipped = resp["_shards"].get("skipped", 0)
-            shards_total = resp["_shards"].get("total", 0)
+            shards_successful = resp.raw["_shards"].get("successful", 0)
+            shards_skipped = resp.raw["_shards"].get("skipped", 0)
+            shards_total = resp.raw["_shards"].get("total", 0)
 
             # check if we have any errors
             if (shards_successful + shards_skipped) < shards_total:
@@ -596,19 +610,14 @@ def scan(
                             shards_total,
                         ),
                     )
-            resp = client.scroll(
-                body={"scroll_id": scroll_id, "scroll": scroll}, **scroll_kwargs
+            resp = scroll_client.scroll(
+                scroll_id=scroll_id, scroll=scroll, **scroll_kwargs
             )
-            scroll_id = resp.get("_scroll_id")
+            scroll_id = resp.raw.get("_scroll_id")
 
     finally:
         if scroll_id and clear_scroll:
-            client.clear_scroll(
-                body={"scroll_id": [scroll_id]},
-                ignore=(404,),
-                params={"__elastic_client_meta": (("h", "s"),)},
-                **transport_kwargs,
-            )
+            client.options(ignore_status=404).clear_scroll(scroll_id=scroll_id)
 
 
 def reindex(
diff --git a/elasticsearch/helpers/errors.py b/elasticsearch/helpers/errors.py
@@ -15,16 +15,13 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import Any, List
+from typing import Any
 
 from ..exceptions import ElasticsearchException
 
 
 class BulkIndexError(ElasticsearchException):
-    @property
-    def errors(self) -> List[Any]:  # type: ignore
-        """List of errors from execution of the last chunk."""
-        return self.args[1]  # type: ignore
+    pass
 
 
 class ScanError(ElasticsearchException):
diff --git a/test_elasticsearch/test_async/test_server/test_helpers.py b/test_elasticsearch/test_async/test_server/test_helpers.py
diff --git a/test_elasticsearch/test_client/test_options.py b/test_elasticsearch/test_client/test_options.py
diff --git a/test_elasticsearch/test_server/test_helpers.py b/test_elasticsearch/test_server/test_helpers.py

Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,6 @@ def cloud_id_to_node_configs(cloud_id: str) -> List[NodeConfig]:`
`181`	`181`	`host=host,`
`182`	`182`	`port=port,`
`183`	`183`	`http_compress=True,`
`184`		`- # TODO: Set TLSv1.2+`
`185`	`184`	`)`
`186`	`185`	`]`
`187`	`186`