From 5904aded364b15e293ceb3f495b229d8dddbc04f Mon Sep 17 00:00:00 2001 From: Stelios Petrakis <92467926+steliospetrakis02@users.noreply.github.com> Date: Sun, 26 Mar 2023 18:43:27 +0300 Subject: [PATCH 1/6] Update frame.py --- pandas/core/frame.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bef7022a7d10f..1534b82d192e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9,6 +9,7 @@ labeling information """ from __future__ import annotations +from pandas.core.indexes.range import RangeIndex import collections from collections import abc @@ -494,6 +495,7 @@ class DataFrame(NDFrame, OpsMixin): + """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -4339,6 +4341,24 @@ def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFr def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: ... + def query_with_duplicate_index(self, expr, engine='numexpr'): + + # Create a copy of the dataframe with a unique index to avoid reindexing errors + unique_index = RangeIndex(len(self.index)) + df_copy = self.copy() + df_copy.index = unique_index + + # Filter the copied dataframe + filtered_df = df_copy.query(expr, engine=engine) + + # Map the filtered index back to the original index labels + index_mapping = dict(zip(unique_index, self.index)) + filtered_df.index = filtered_df.index.map(index_mapping) + + # Return the filtered dataframe + return filtered_df + + @overload def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: ... From 7e74945a1d1fa3114ecd68bb95827cf6d3f291da Mon Sep 17 00:00:00 2001 From: Stelios Petrakis <92467926+steliospetrakis02@users.noreply.github.com> Date: Sun, 26 Mar 2023 18:49:09 +0300 Subject: [PATCH 2/6] Update frame.py --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1534b82d192e3..25f439c48243f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -495,7 +495,6 @@ class DataFrame(NDFrame, OpsMixin): - """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. From 2d6b1101243238f4b92bdf0b61b358aee784e703 Mon Sep 17 00:00:00 2001 From: Stelios Petrakis <92467926+steliospetrakis02@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:46:35 +0300 Subject: [PATCH 3/6] Update frame.py --- pandas/core/frame.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 25f439c48243f..bff4e5eddde44 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4340,29 +4340,13 @@ def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFr def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: ... - def query_with_duplicate_index(self, expr, engine='numexpr'): - # Create a copy of the dataframe with a unique index to avoid reindexing errors - unique_index = RangeIndex(len(self.index)) - df_copy = self.copy() - df_copy.index = unique_index - - # Filter the copied dataframe - filtered_df = df_copy.query(expr, engine=engine) - - # Map the filtered index back to the original index labels - index_mapping = dict(zip(unique_index, self.index)) - filtered_df.index = filtered_df.index.map(index_mapping) - - # Return the filtered dataframe - return filtered_df @overload def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: ... - def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | None: """ Query the columns of a DataFrame with a boolean expression. @@ -4505,8 +4489,26 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No raise ValueError(msg) kwargs["level"] = kwargs.pop("level", 0) + 1 kwargs["target"] = None - res = self.eval(expr, **kwargs) + + + try: + res = self.eval(expr, **kwargs) + except ValueError: + + # Create a copy of the dataframe with a unique index to avoid reindexing errors + unique_index = RangeIndex(len(self.index)) + df_copy = self.copy() + df_copy.index = unique_index + + # Filter the copied dataframe + filtered_df = df_copy.query(expr, engine=engine) + + # Map the filtered index back to the original index labels + index_mapping = dict(zip(unique_index, self.index)) + filtered_df.index = filtered_df.index.map(index_mapping) + return filtered_df + try: result = self.loc[res] except ValueError: From de419f205b6179e2538ca56c8d5f8387ed6d4520 Mon Sep 17 00:00:00 2001 From: Stelios Petrakis <92467926+steliospetrakis02@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:48:16 +0300 Subject: [PATCH 4/6] Update frame.py --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bff4e5eddde44..ef7a74eb8f4cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4347,6 +4347,7 @@ def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: ... + def query(self,expr: str, engine='numexpr', *, inplace: bool = False, **kwargs) -> DataFrame | None: """ Query the columns of a DataFrame with a boolean expression. From dc1f7c6502c8cf7fe61133391773f3f25f39efdc Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Wed, 29 Mar 2023 18:07:50 +0300 Subject: [PATCH 5/6] Keep signature as it was --- pandas/core/frame.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6818e531dae9d..56474c41c3323 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4332,14 +4332,11 @@ def query(self, expr: str, *, inplace: Literal[False] = ..., **kwargs) -> DataFr def query(self, expr: str, *, inplace: Literal[True], **kwargs) -> None: ... - - - @overload def query(self, expr: str, *, inplace: bool = ..., **kwargs) -> DataFrame | None: ... - def query(self,expr: str, engine='numexpr', *, inplace: bool = False, **kwargs) -> DataFrame | None: + def query(self,expr: str, *,inplace: bool = False, **kwargs) -> DataFrame | None: """ Query the columns of a DataFrame with a boolean expression. @@ -4476,6 +4473,7 @@ def query(self,expr: str, engine='numexpr', *, inplace: bool = False, **kwargs) A B C C 0 1 10 10 """ + engine='numexpr' inplace = validate_bool_kwarg(inplace, "inplace") if not isinstance(expr, str): msg = f"expr must be a string to be evaluated, {type(expr)} given" From 765ad51faa8dc7298ce5b1834e269de0185019c9 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Wed, 29 Mar 2023 19:41:52 +0300 Subject: [PATCH 6/6] Added if statment in order to check when index has duplicate labels --- pandas/core/frame.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56474c41c3323..796e462b92250 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4473,19 +4473,16 @@ def query(self,expr: str, *,inplace: bool = False, **kwargs) -> DataFrame | None A B C C 0 1 10 10 """ - engine='numexpr' + inplace = validate_bool_kwarg(inplace, "inplace") if not isinstance(expr, str): msg = f"expr must be a string to be evaluated, {type(expr)} given" raise ValueError(msg) kwargs["level"] = kwargs.pop("level", 0) + 1 kwargs["target"] = None - - - try: - res = self.eval(expr, **kwargs) - except ValueError: - + + if self.index.duplicated().any(): + engine='numexpr' # Create a copy of the dataframe with a unique index to avoid reindexing errors unique_index = RangeIndex(len(self.index)) df_copy = self.copy() @@ -4497,11 +4494,13 @@ def query(self,expr: str, *,inplace: bool = False, **kwargs) -> DataFrame | None # Map the filtered index back to the original index labels index_mapping = dict(zip(unique_index, self.index)) filtered_df.index = filtered_df.index.map(index_mapping) - + return filtered_df - + + res = self.eval(expr, **kwargs) try: result = self.loc[res] + except ValueError: # when res is multi-dimensional loc raises, but this is sometimes a # valid query