Skip to content

BUG: Index with duplicate labels raises ValueError in Dataframe.query #52224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
19 changes: 19 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
labeling information
"""
from __future__ import annotations
from pandas.core.indexes.range import RangeIndex

import collections
from collections import abc
Expand Down Expand Up @@ -4339,6 +4340,24 @@ def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFr
def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None:
...

def query_with_duplicate_index(self, expr, engine='numexpr'):

# Create a copy of the dataframe with a unique index to avoid reindexing errors
unique_index = RangeIndex(len(self.index))
df_copy = self.copy()
df_copy.index = unique_index

# Filter the copied dataframe
filtered_df = df_copy.query(expr, engine=engine)

# Map the filtered index back to the original index labels
index_mapping = dict(zip(unique_index, self.index))
filtered_df.index = filtered_df.index.map(index_mapping)

# Return the filtered dataframe
return filtered_df


@overload
def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None:
...
Expand Down