diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index d1a0c42e97..6258eb00d5 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -27,6 +27,7 @@ import bigframes.core.guid as guid import bigframes.core.indexes as indexes import bigframes.core.scalar +import bigframes.core.window_spec as windows import bigframes.dataframe import bigframes.dtypes import bigframes.exceptions as bfe @@ -477,6 +478,19 @@ def _iloc_getitem_series_or_dataframe( Union[bigframes.dataframe.DataFrame, bigframes.series.Series], series_or_dataframe.iloc[0:0], ) + + # Check if both positive index and negative index are necessary + if isinstance(key, (bigframes.series.Series, indexes.Index)): + # Avoid data download + is_key_unisigned = False + else: + first_sign = key[0] >= 0 + is_key_unisigned = True + for k in key: + if (k >= 0) != first_sign: + is_key_unisigned = False + break + if isinstance(series_or_dataframe, bigframes.series.Series): original_series_name = series_or_dataframe.name series_name = ( @@ -497,7 +511,27 @@ def _iloc_getitem_series_or_dataframe( block = df._block # explicitly set index to offsets, reset_index may not generate offsets in some modes block, offsets_id = block.promote_offsets("temp_iloc_offsets_") - block = block.set_index([offsets_id]) + pos_block = block.set_index([offsets_id]) + + if not is_key_unisigned or key[0] < 0: + neg_block, size_col_id = block.apply_window_op( + offsets_id, + ops.aggregations.SizeUnaryOp(), + window_spec=windows.rows(), + ) + neg_block, neg_index_id = neg_block.apply_binary_op( + offsets_id, size_col_id, ops.SubOp() + ) + + neg_block = neg_block.set_index([neg_index_id]).drop_columns( + [size_col_id, offsets_id] + ) + + if is_key_unisigned: + block = pos_block if key[0] >= 0 else neg_block + else: + block = pos_block.concat([neg_block], how="inner") + df = bigframes.dataframe.DataFrame(block) result = df.loc[key] diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index b97846d992..c2e4a1c8ad 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -4400,9 +4400,15 @@ def test_loc_list_multiindex(scalars_dfs_maybe_ordered): ) -def test_iloc_list(scalars_df_index, scalars_pandas_df_index): - index_list = [0, 0, 0, 5, 4, 7] - +@pytest.mark.parametrize( + "index_list", + [ + [0, 1, 2, 3, 4, 4], + [0, 0, 0, 5, 4, 7, -2, -5, 3], + [-1, -2, -3, -4, -5, -5], + ], +) +def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list): bf_result = scalars_df_index.iloc[index_list] pd_result = scalars_pandas_df_index.iloc[index_list] @@ -4412,11 +4418,17 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index): ) +@pytest.mark.parametrize( + "index_list", + [ + [0, 1, 2, 3, 4, 4], + [0, 0, 0, 5, 4, 7, -2, -5, 3], + [-1, -2, -3, -4, -5, -5], + ], +) def test_iloc_list_partial_ordering( - scalars_df_partial_ordering, scalars_pandas_df_index + scalars_df_partial_ordering, scalars_pandas_df_index, index_list ): - index_list = [0, 0, 0, 5, 4, 7] - bf_result = scalars_df_partial_ordering.iloc[index_list] pd_result = scalars_pandas_df_index.iloc[index_list]