diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c2017c0acc55e..6751227fcdddc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9,6 +9,7 @@ labeling information """ from __future__ import annotations +from pandas.core.indexes.range import RangeIndex import collections from collections import abc @@ -4330,7 +4331,7 @@ def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: ... - def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | None: + def query(self,expr: str, *,inplace: bool = False, **kwargs) -> DataFrame | None: """ Query the columns of a DataFrame with a boolean expression. @@ -4467,16 +4468,34 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No A B C C 0 1 10 10 """ + inplace = validate_bool_kwarg(inplace, "inplace") if not isinstance(expr, str): msg = f"expr must be a string to be evaluated, {type(expr)} given" raise ValueError(msg) kwargs["level"] = kwargs.pop("level", 0) + 1 kwargs["target"] = None - res = self.eval(expr, **kwargs) + if self.index.duplicated().any(): + engine='numexpr' + # Create a copy of the dataframe with a unique index to avoid reindexing errors + unique_index = RangeIndex(len(self.index)) + df_copy = self.copy() + df_copy.index = unique_index + + # Filter the copied dataframe + filtered_df = df_copy.query(expr, engine=engine) + + # Map the filtered index back to the original index labels + index_mapping = dict(zip(unique_index, self.index)) + filtered_df.index = filtered_df.index.map(index_mapping) + + return filtered_df + + res = self.eval(expr, **kwargs) try: result = self.loc[res] + except ValueError: # when res is multi-dimensional loc raises, but this is sometimes a # valid query