googleapis
diff --git a/‎bigframes/clients.py
Lines changed: 16 additions & 8 deletions b/‎bigframes/clients.py
Lines changed: 16 additions & 8 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 101 additions & 19 deletions b/‎bigframes/core/blocks.py
Lines changed: 101 additions & 19 deletions
diff --git a/‎bigframes/core/indexers.py
Lines changed: 35 additions & 1 deletion b/‎bigframes/core/indexers.py
Lines changed: 35 additions & 1 deletion
diff --git a/‎bigframes/core/indexes/base.py
Lines changed: 37 additions & 7 deletions b/‎bigframes/core/indexes/base.py
Lines changed: 37 additions & 7 deletions
@@ -94,16 +94,24 @@ def create_bq_connection(
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#grant_permission_on_function
         self._ensure_iam_binding(project_id, service_account_id, iam_role)
 
-    # Introduce retries to accommodate transient errors like etag mismatch,
-    # which can be caused by concurrent operation on the same resource, and
-    # manifests with message like:
-    # google.api_core.exceptions.Aborted: 409 There were concurrent policy
-    # changes. Please retry the whole read-modify-write with exponential
-    # backoff. The request's ETag '\007\006\003,\264\304\337\272' did not match
-    # the current policy's ETag '\007\006\003,\3750&\363'.
+    # Introduce retries to accommodate transient errors like:
+    # (1) Etag mismatch,
+    #     which can be caused by concurrent operation on the same resource, and
+    #     manifests with message like:
+    #     google.api_core.exceptions.Aborted: 409 There were concurrent policy
+    #     changes. Please retry the whole read-modify-write with exponential
+    #     backoff. The request's ETag '\007\006\003,\264\304\337\272' did not
+    #     match the current policy's ETag '\007\006\003,\3750&\363'.
+    # (2) Connection creation,
+    #     for which sometimes it takes a bit for its service account to reflect
+    #     across APIs (e.g. b/397662004, b/386838767), before which, an attempt
+    #     to set an IAM policy for the service account may throw an error like:
+    #     google.api_core.exceptions.InvalidArgument: 400 Service account
+    #     bqcx-*@gcp-sa-bigquery-condel.iam.gserviceaccount.com does not exist.
     @google.api_core.retry.Retry(
         predicate=google.api_core.retry.if_exception_type(
-            google.api_core.exceptions.Aborted
+            google.api_core.exceptions.Aborted,
+            google.api_core.exceptions.InvalidArgument,
         ),
         initial=10,
         maximum=20,
 
@@ -22,6 +22,7 @@
 from __future__ import annotations
 
 import ast
+import copy
 import dataclasses
 import datetime
 import functools
@@ -30,6 +31,7 @@
 import textwrap
 import typing
 from typing import (
+    Any,
     Iterable,
     List,
     Literal,
@@ -49,7 +51,7 @@
 import pyarrow as pa
 
 from bigframes import session
-import bigframes._config.sampling_options as sampling_options
+from bigframes._config import sampling_options
 import bigframes.constants
 import bigframes.core as core
 import bigframes.core.compile.googlesql as googlesql
@@ -535,19 +537,9 @@ def to_pandas(
         Returns:
             pandas.DataFrame, QueryJob
         """
-        if (sampling_method is not None) and (sampling_method not in _SAMPLING_METHODS):
-            raise NotImplementedError(
-                f"The downsampling method {sampling_method} is not implemented, "
-                f"please choose from {','.join(_SAMPLING_METHODS)}."
-            )
-
-        sampling = bigframes.options.sampling.with_max_download_size(max_download_size)
-        if sampling_method is not None:
-            sampling = sampling.with_method(sampling_method).with_random_state(  # type: ignore
-                random_state
-            )
-        else:
-            sampling = sampling.with_disabled()
+        sampling = self._get_sampling_option(
+            max_download_size, sampling_method, random_state
+        )
 
         df, query_job = self._materialize_local(
             materialize_options=MaterializationOptions(
@@ -559,6 +551,27 @@ def to_pandas(
         df.set_axis(self.column_labels, axis=1, copy=False)
         return df, query_job
 
+    def _get_sampling_option(
+        self,
+        max_download_size: Optional[int] = None,
+        sampling_method: Optional[str] = None,
+        random_state: Optional[int] = None,
+    ) -> sampling_options.SamplingOptions:
+
+        if (sampling_method is not None) and (sampling_method not in _SAMPLING_METHODS):
+            raise NotImplementedError(
+                f"The downsampling method {sampling_method} is not implemented, "
+                f"please choose from {','.join(_SAMPLING_METHODS)}."
+            )
+
+        sampling = bigframes.options.sampling.with_max_download_size(max_download_size)
+        if sampling_method is None:
+            return sampling.with_disabled()
+
+        return sampling.with_method(sampling_method).with_random_state(  # type: ignore
+            random_state
+        )
+
     def try_peek(
         self, n: int = 20, force: bool = False, allow_large_results=None
     ) -> typing.Optional[pd.DataFrame]:
@@ -798,11 +811,73 @@ def split(
         return [sliced_block.drop_columns(drop_cols) for sliced_block in sliced_blocks]
 
     def _compute_dry_run(
-        self, value_keys: Optional[Iterable[str]] = None
-    ) -> bigquery.QueryJob:
+        self,
+        value_keys: Optional[Iterable[str]] = None,
+        *,
+        ordered: bool = True,
+        max_download_size: Optional[int] = None,
+        sampling_method: Optional[str] = None,
+        random_state: Optional[int] = None,
+    ) -> typing.Tuple[pd.Series, bigquery.QueryJob]:
+        sampling = self._get_sampling_option(
+            max_download_size, sampling_method, random_state
+        )
+        if sampling.enable_downsampling:
+            raise NotImplementedError("Dry run with sampling is not supported")
+
+        index: List[Any] = []
+        values: List[Any] = []
+
+        index.append("columnCount")
+        values.append(len(self.value_columns))
+        index.append("columnDtypes")
+        values.append(
+            {
+                col: self.expr.get_column_type(self.resolve_label_exact_or_error(col))
+                for col in self.column_labels
+            }
+        )
+
+        index.append("indexLevel")
+        values.append(self.index.nlevels)
+        index.append("indexDtypes")
+        values.append(self.index.dtypes)
+
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
-        query_job = self.session._executor.dry_run(expr)
-        return query_job
+        query_job = self.session._executor.dry_run(expr, ordered)
+        job_api_repr = copy.deepcopy(query_job._properties)
+
+        job_ref = job_api_repr["jobReference"]
+        for key, val in job_ref.items():
+            index.append(key)
+            values.append(val)
+
+        index.append("jobType")
+        values.append(job_api_repr["configuration"]["jobType"])
+
+        query_config = job_api_repr["configuration"]["query"]
+        for key in ("destinationTable", "useLegacySql"):
+            index.append(key)
+            values.append(query_config.get(key))
+
+        query_stats = job_api_repr["statistics"]["query"]
+        for key in (
+            "referencedTables",
+            "totalBytesProcessed",
+            "cacheHit",
+            "statementType",
+        ):
+            index.append(key)
+            values.append(query_stats.get(key))
+
+        index.append("creationTime")
+        values.append(
+            pd.Timestamp(
+                job_api_repr["statistics"]["creationTime"], unit="ms", tz="UTC"
+            )
+        )
+
+        return pd.Series(values, index=index), query_job
 
     def _apply_value_keys_to_expr(self, value_keys: Optional[Iterable[str]] = None):
         expr = self._expr
@@ -2703,11 +2778,18 @@ def to_pandas(
                 "Cannot materialize index, as this object does not have an index. Set index column(s) using set_index."
             )
         ordered = ordered if ordered is not None else True
+
         df, query_job = self._block.select_columns([]).to_pandas(
-            ordered=ordered, allow_large_results=allow_large_results
+            ordered=ordered,
+            allow_large_results=allow_large_results,
         )
         return df.index, query_job
 
+    def _compute_dry_run(
+        self, *, ordered: bool = True
+    ) -> Tuple[pd.Series, bigquery.QueryJob]:
+        return self._block.select_columns([])._compute_dry_run(ordered=ordered)
+
     def resolve_level(self, level: LevelsType) -> typing.Sequence[str]:
         if utils.is_list_like(level):
             levels = list(level)
 
@@ -27,6 +27,7 @@
 import bigframes.core.guid as guid
 import bigframes.core.indexes as indexes
 import bigframes.core.scalar
+import bigframes.core.window_spec as windows
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.exceptions as bfe
@@ -477,6 +478,19 @@ def _iloc_getitem_series_or_dataframe(
                 Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
                 series_or_dataframe.iloc[0:0],
             )
+
+        # Check if both positive index and negative index are necessary
+        if isinstance(key, (bigframes.series.Series, indexes.Index)):
+            # Avoid data download
+            is_key_unisigned = False
+        else:
+            first_sign = key[0] >= 0
+            is_key_unisigned = True
+            for k in key:
+                if (k >= 0) != first_sign:
+                    is_key_unisigned = False
+                    break
+
         if isinstance(series_or_dataframe, bigframes.series.Series):
             original_series_name = series_or_dataframe.name
             series_name = (
@@ -497,7 +511,27 @@ def _iloc_getitem_series_or_dataframe(
         block = df._block
         # explicitly set index to offsets, reset_index may not generate offsets in some modes
         block, offsets_id = block.promote_offsets("temp_iloc_offsets_")
-        block = block.set_index([offsets_id])
+        pos_block = block.set_index([offsets_id])
+
+        if not is_key_unisigned or key[0] < 0:
+            neg_block, size_col_id = block.apply_window_op(
+                offsets_id,
+                ops.aggregations.SizeUnaryOp(),
+                window_spec=windows.rows(),
+            )
+            neg_block, neg_index_id = neg_block.apply_binary_op(
+                offsets_id, size_col_id, ops.SubOp()
+            )
+
+            neg_block = neg_block.set_index([neg_index_id]).drop_columns(
+                [size_col_id, offsets_id]
+            )
+
+        if is_key_unisigned:
+            block = pos_block if key[0] >= 0 else neg_block
+        else:
+            block = pos_block.concat([neg_block], how="inner")
+
         df = bigframes.dataframe.DataFrame(block)
 
         result = df.loc[key]
 
@@ -17,7 +17,7 @@
 from __future__ import annotations
 
 import typing
-from typing import Hashable, Literal, Optional, Sequence, Union
+from typing import Hashable, Literal, Optional, overload, Sequence, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.indexes.base as vendored_pandas_index
@@ -228,15 +228,16 @@ def T(self) -> Index:
         return self.transpose()
 
     @property
-    def query_job(self) -> Optional[bigquery.QueryJob]:
+    def query_job(self) -> bigquery.QueryJob:
         """BigQuery job metadata for the most recent query.
 
         Returns:
             The most recent `QueryJob
             <https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob>`_.
         """
         if self._query_job is None:
-            self._query_job = self._block._compute_dry_run()
+            _, query_job = self._block._compute_dry_run()
+            self._query_job = query_job
         return self._query_job
 
     def __repr__(self) -> str:
@@ -252,7 +253,8 @@ def __repr__(self) -> str:
         opts = bigframes.options.display
         max_results = opts.max_rows
         if opts.repr_mode == "deferred":
-            return formatter.repr_query_job(self._block._compute_dry_run())
+            _, dry_run_query_job = self._block._compute_dry_run()
+            return formatter.repr_query_job(dry_run_query_job)
 
         pandas_df, _, query_job = self._block.retrieve_repr_request_results(max_results)
         self._query_job = query_job
@@ -490,18 +492,46 @@ def __getitem__(self, key: int) -> typing.Any:
         else:
             raise NotImplementedError(f"Index key not supported {key}")
 
-    def to_pandas(self, *, allow_large_results: Optional[bool] = None) -> pandas.Index:
+    @overload
+    def to_pandas(  # type: ignore[overload-overlap]
+        self,
+        *,
+        allow_large_results: Optional[bool] = ...,
+        dry_run: Literal[False] = ...,
+    ) -> pandas.Index:
+        ...
+
+    @overload
+    def to_pandas(
+        self, *, allow_large_results: Optional[bool] = ..., dry_run: Literal[True] = ...
+    ) -> pandas.Series:
+        ...
+
+    def to_pandas(
+        self, *, allow_large_results: Optional[bool] = None, dry_run: bool = False
+    ) -> pandas.Index | pandas.Series:
         """Gets the Index as a pandas Index.
 
         Args:
             allow_large_results (bool, default None):
                 If not None, overrides the global setting to allow or disallow large query results
                 over the default size limit of 10 GB.
+            dry_run (bool, default False):
+                If this argument is true, this method will not process the data. Instead, it returns
+                a Pandas series containing dtype and the amount of bytes to be processed.
 
         Returns:
-            pandas.Index:
-                A pandas Index with all of the labels from this Index.
+            pandas.Index | pandas.Series:
+                A pandas Index with all of the labels from this Index. If dry run is set to True,
+                returns a Series containing dry run statistics.
         """
+        if dry_run:
+            dry_run_stats, dry_run_job = self._block.index._compute_dry_run(
+                ordered=True
+            )
+            self._query_job = dry_run_job
+            return dry_run_stats
+
         df, query_job = self._block.index.to_pandas(
             ordered=True, allow_large_results=allow_large_results
         )