Skip to content

BUG: Index with duplicate labels raises ValueError in Dataframe.query #52224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
23 changes: 21 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
labeling information
"""
from __future__ import annotations
from pandas.core.indexes.range import RangeIndex

import collections
from collections import abc
Expand Down Expand Up @@ -4330,7 +4331,7 @@ def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None:
def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None:
...

def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | None:
def query(self,expr: str, *,inplace: bool = False, **kwargs) -> DataFrame | None:
"""
Query the columns of a DataFrame with a boolean expression.

Expand Down Expand Up @@ -4467,16 +4468,34 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No
A B C C
0 1 10 10
"""

inplace = validate_bool_kwarg(inplace, "inplace")
if not isinstance(expr, str):
msg = f"expr must be a string to be evaluated, {type(expr)} given"
raise ValueError(msg)
kwargs["level"] = kwargs.pop("level", 0) + 1
kwargs["target"] = None
res = self.eval(expr, **kwargs)

if self.index.duplicated().any():
engine='numexpr'
# Create a copy of the dataframe with a unique index to avoid reindexing errors
unique_index = RangeIndex(len(self.index))
df_copy = self.copy()
df_copy.index = unique_index

# Filter the copied dataframe
filtered_df = df_copy.query(expr, engine=engine)

# Map the filtered index back to the original index labels
index_mapping = dict(zip(unique_index, self.index))
filtered_df.index = filtered_df.index.map(index_mapping)

return filtered_df

res = self.eval(expr, **kwargs)
try:
result = self.loc[res]

except ValueError:
# when res is multi-dimensional loc raises, but this is sometimes a
# valid query
Expand Down