From b69bc8fb2748c02d7a593a1df94582bcaa70eead Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 13 Feb 2023 18:42:09 +0800 Subject: [PATCH 01/13] Fix init --- pandas/core/groupby/generic.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 19fba398feb08..652499bf24113 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2017,10 +2017,7 @@ def func(df): return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) func.__name__ = "idxmax" - result = self._python_apply_general( - func, self._obj_with_exclusions, not_indexed_same=True - ) - return result + return self._idxmin_idxmax(func) def idxmin( self, @@ -2103,13 +2100,28 @@ def func(df): return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) func.__name__ = "idxmin" - result = self._python_apply_general( - func, self._obj_with_exclusions, not_indexed_same=True - ) - return result + return self._idxmin_idxmax(func) boxplot = boxplot_frame_groupby + def _idxmin_idxmax(self, func: Callable) -> DataFrame: + result, mutated = self.grouper.apply(func, self._obj_with_exclusions, self.axis) + + if len(result) == 0: + return DataFrame( + [], + columns=self._obj_with_exclusions.columns, + index=self.grouper.result_index, + ).astype( + self.grouper.result_index.dtypes[0] + if isinstance(self.grouper.result_index, MultiIndex) + else self.grouper.result_index.dtype + ) + else: + return self._wrap_applied_output( + self._obj_with_exclusions, result, not_indexed_same=True + ) + def value_counts( self, subset: Sequence[Hashable] | None = None, From 72f2b2756e99ac293575372c898337dd950ba345 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Thu, 16 Feb 2023 10:50:15 +0800 Subject: [PATCH 02/13] Fix Series idxmin/max --- pandas/core/groupby/generic.py | 38 +++++++++++++++++++++------- pandas/tests/groupby/test_groupby.py | 2 ++ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 652499bf24113..a79d942a3103d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1080,13 +1080,37 @@ def nsmallest( @doc(Series.idxmin.__doc__) def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: - result = self._op_via_apply("idxmin", axis=axis, skipna=skipna) - return result + axis = self.axis + + def func(df): + return df.idxmin(axis=axis, skipna=skipna) + + func.__name__ = "idxmin" + return self._idxmin_idxmax(func) @doc(Series.idxmax.__doc__) def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: - result = self._op_via_apply("idxmax", axis=axis, skipna=skipna) - return result + axis = self.axis + + def func(df): + return df.idxmax(axis=axis, skipna=skipna) + + func.__name__ = "idxmax" + return self._idxmin_idxmax(func) + + def _idxmin_idxmax(self, func: Callable) -> DataFrame: + result, mutated = self.grouper.apply(func, self._obj_with_exclusions, self.axis) + + if len(result) == 0: + return Series( + [], + name=self._obj_with_exclusions.name, + index=self.grouper.result_index, + ).astype(self._obj_with_exclusions.index.dtype) + else: + return self._wrap_applied_output( + self._obj_with_exclusions, result, not_indexed_same=True + ) @doc(Series.corr.__doc__) def corr( @@ -2112,11 +2136,7 @@ def _idxmin_idxmax(self, func: Callable) -> DataFrame: [], columns=self._obj_with_exclusions.columns, index=self.grouper.result_index, - ).astype( - self.grouper.result_index.dtypes[0] - if isinstance(self.grouper.result_index, MultiIndex) - else self.grouper.result_index.dtype - ) + ).astype(self._obj_with_exclusions.index.dtype) else: return self._wrap_applied_output( self._obj_with_exclusions, result, not_indexed_same=True diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d969ce4a2bb71..540cb7ba6645d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1984,6 +1984,8 @@ def get_categorical_invalid_expected(): result = get_result() expected = df.set_index(keys)[columns] + if op in ["idxmax", "idxmin"]: + expected = expected.astype(df.index.dtype) if override_dtype is not None: expected = expected.astype(override_dtype) if len(keys) == 1: From 7eec8024cb9006699b958750b7c956ee4fcf2ffc Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Thu, 16 Feb 2023 10:55:22 +0800 Subject: [PATCH 03/13] Typo --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a79d942a3103d..88a6229631a56 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1098,8 +1098,8 @@ def func(df): func.__name__ = "idxmax" return self._idxmin_idxmax(func) - def _idxmin_idxmax(self, func: Callable) -> DataFrame: - result, mutated = self.grouper.apply(func, self._obj_with_exclusions, self.axis) + def _idxmin_idxmax(self, func: Callable) -> Series: + result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) if len(result) == 0: return Series( @@ -2129,7 +2129,7 @@ def func(df): boxplot = boxplot_frame_groupby def _idxmin_idxmax(self, func: Callable) -> DataFrame: - result, mutated = self.grouper.apply(func, self._obj_with_exclusions, self.axis) + result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) if len(result) == 0: return DataFrame( From e200f0e7691836a822d661fb61218f4b99ea90b3 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 17 Feb 2023 12:35:50 +0800 Subject: [PATCH 04/13] Use self.obj_.constructor --- pandas/core/groupby/generic.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 88a6229631a56..30cbcd2979147 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1080,8 +1080,6 @@ def nsmallest( @doc(Series.idxmin.__doc__) def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: - axis = self.axis - def func(df): return df.idxmin(axis=axis, skipna=skipna) @@ -1090,8 +1088,6 @@ def func(df): @doc(Series.idxmax.__doc__) def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: - axis = self.axis - def func(df): return df.idxmax(axis=axis, skipna=skipna) @@ -1102,11 +1098,17 @@ def _idxmin_idxmax(self, func: Callable) -> Series: result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) if len(result) == 0: - return Series( - [], - name=self._obj_with_exclusions.name, + return self.obj._constructor( index=self.grouper.result_index, - ).astype(self._obj_with_exclusions.index.dtype) + name=self._obj_with_exclusions.name, + dtype=self._obj_with_exclusions.index.dtype, + ) + # return Series( + # [], + # name=self._obj_with_exclusions.name, + # index=self.grouper.result_index, + # dtype=self._obj_with_exclusions.index.dtype, + # ) else: return self._wrap_applied_output( self._obj_with_exclusions, result, not_indexed_same=True @@ -2132,11 +2134,11 @@ def _idxmin_idxmax(self, func: Callable) -> DataFrame: result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) if len(result) == 0: - return DataFrame( - [], - columns=self._obj_with_exclusions.columns, + return self.obj._constructor( index=self.grouper.result_index, - ).astype(self._obj_with_exclusions.index.dtype) + columns=self._obj_with_exclusions.columns, + dtype=self._obj_with_exclusions.index.dtype, + ) else: return self._wrap_applied_output( self._obj_with_exclusions, result, not_indexed_same=True From a9443b7d893e60565666db97ad674ba1f18f7476 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 17 Feb 2023 12:39:02 +0800 Subject: [PATCH 05/13] Fix mypy incompatible return value type --- pandas/core/groupby/generic.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 30cbcd2979147..b227b1ef8cfe7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1094,7 +1094,7 @@ def func(df): func.__name__ = "idxmax" return self._idxmin_idxmax(func) - def _idxmin_idxmax(self, func: Callable) -> Series: + def _idxmin_idxmax(self, func: Callable) -> DataFrame | Series: result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) if len(result) == 0: @@ -1103,12 +1103,7 @@ def _idxmin_idxmax(self, func: Callable) -> Series: name=self._obj_with_exclusions.name, dtype=self._obj_with_exclusions.index.dtype, ) - # return Series( - # [], - # name=self._obj_with_exclusions.name, - # index=self.grouper.result_index, - # dtype=self._obj_with_exclusions.index.dtype, - # ) + else: return self._wrap_applied_output( self._obj_with_exclusions, result, not_indexed_same=True From ee4b751469df3ed5fed10f4d8f28eec7962dd7c8 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Fri, 17 Feb 2023 14:05:02 +0800 Subject: [PATCH 06/13] Fix return type annotation --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b227b1ef8cfe7..7df50eeb3d1ba 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1079,7 +1079,7 @@ def nsmallest( return result @doc(Series.idxmin.__doc__) - def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: + def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: def func(df): return df.idxmin(axis=axis, skipna=skipna) @@ -1087,7 +1087,7 @@ def func(df): return self._idxmin_idxmax(func) @doc(Series.idxmax.__doc__) - def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: + def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: def func(df): return df.idxmax(axis=axis, skipna=skipna) From 498917e7b5d5bee393c7adbcdccaab392906b5fb Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 18 Feb 2023 11:41:30 +0800 Subject: [PATCH 07/13] Less code way --- pandas/core/groupby/generic.py | 55 ++++++++-------------------------- 1 file changed, 12 insertions(+), 43 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7df50eeb3d1ba..61a4cb6a1b2d4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1080,34 +1080,13 @@ def nsmallest( @doc(Series.idxmin.__doc__) def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: - def func(df): - return df.idxmin(axis=axis, skipna=skipna) - - func.__name__ = "idxmin" - return self._idxmin_idxmax(func) + result = self._op_via_apply("idxmax", axis=axis, skipna=skipna) + return result.astype(self._obj_with_exclusions.index.dtype) @doc(Series.idxmax.__doc__) def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: - def func(df): - return df.idxmax(axis=axis, skipna=skipna) - - func.__name__ = "idxmax" - return self._idxmin_idxmax(func) - - def _idxmin_idxmax(self, func: Callable) -> DataFrame | Series: - result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) - - if len(result) == 0: - return self.obj._constructor( - index=self.grouper.result_index, - name=self._obj_with_exclusions.name, - dtype=self._obj_with_exclusions.index.dtype, - ) - - else: - return self._wrap_applied_output( - self._obj_with_exclusions, result, not_indexed_same=True - ) + result = self._op_via_apply("idxmin", axis=axis, skipna=skipna) + return result.astype(self._obj_with_exclusions.index.dtype) @doc(Series.corr.__doc__) def corr( @@ -2037,8 +2016,10 @@ def idxmax( def func(df): return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) - func.__name__ = "idxmax" - return self._idxmin_idxmax(func) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) + return result.astype(self._obj_with_exclusions.index.dtype, copy=False) def idxmin( self, @@ -2120,25 +2101,13 @@ def idxmin( def func(df): return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) - func.__name__ = "idxmin" - return self._idxmin_idxmax(func) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) + return result.astype(self._obj_with_exclusions.index.dtype, copy=False) boxplot = boxplot_frame_groupby - def _idxmin_idxmax(self, func: Callable) -> DataFrame: - result, _ = self.grouper.apply(func, self._obj_with_exclusions, self.axis) - - if len(result) == 0: - return self.obj._constructor( - index=self.grouper.result_index, - columns=self._obj_with_exclusions.columns, - dtype=self._obj_with_exclusions.index.dtype, - ) - else: - return self._wrap_applied_output( - self._obj_with_exclusions, result, not_indexed_same=True - ) - def value_counts( self, subset: Sequence[Hashable] | None = None, From ae26c2e8a4c343327af56e8c21c81ce0e3480a25 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 18 Feb 2023 14:49:05 +0800 Subject: [PATCH 08/13] Fix typo --- pandas/core/groupby/generic.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 61a4cb6a1b2d4..17f4af6a27bba 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1079,13 +1079,13 @@ def nsmallest( return result @doc(Series.idxmin.__doc__) - def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: - result = self._op_via_apply("idxmax", axis=axis, skipna=skipna) + def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: + result = self._op_via_apply("idxmin", axis=axis, skipna=skipna) return result.astype(self._obj_with_exclusions.index.dtype) @doc(Series.idxmax.__doc__) - def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series | DataFrame: - result = self._op_via_apply("idxmin", axis=axis, skipna=skipna) + def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: + result = self._op_via_apply("idxmax", axis=axis, skipna=skipna) return result.astype(self._obj_with_exclusions.index.dtype) @doc(Series.corr.__doc__) @@ -2016,6 +2016,7 @@ def idxmax( def func(df): return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) + func.__name__ = "idxmax" result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) @@ -2101,6 +2102,7 @@ def idxmin( def func(df): return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) + func.__name__ = "idxmax" result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) From 8d6a1ee8c14d0bd57864d6779e8b004a8d166491 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 18 Feb 2023 15:12:02 +0800 Subject: [PATCH 09/13] Fix typo --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 17f4af6a27bba..f3ae42959adf1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2102,7 +2102,7 @@ def idxmin( def func(df): return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) - func.__name__ = "idxmax" + func.__name__ = "idxmin" result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) From db4bc289d8ed8307b19c9838368039e2f52bfb59 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Sat, 18 Feb 2023 23:18:50 +0800 Subject: [PATCH 10/13] Fix test --- pandas/core/groupby/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f3ae42959adf1..803374b364209 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1081,12 +1081,12 @@ def nsmallest( @doc(Series.idxmin.__doc__) def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: result = self._op_via_apply("idxmin", axis=axis, skipna=skipna) - return result.astype(self._obj_with_exclusions.index.dtype) + return result.astype(self.obj.index.dtype) if result.empty else result @doc(Series.idxmax.__doc__) def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: result = self._op_via_apply("idxmax", axis=axis, skipna=skipna) - return result.astype(self._obj_with_exclusions.index.dtype) + return result.astype(self.obj.index.dtype) if result.empty else result @doc(Series.corr.__doc__) def corr( @@ -2020,7 +2020,7 @@ def func(df): result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) - return result.astype(self._obj_with_exclusions.index.dtype, copy=False) + return result.astype(self.obj.index.dtype) if result.empty else result def idxmin( self, @@ -2106,7 +2106,7 @@ def func(df): result = self._python_apply_general( func, self._obj_with_exclusions, not_indexed_same=True ) - return result.astype(self._obj_with_exclusions.index.dtype, copy=False) + return result.astype(self.obj.index.dtype) if result.empty else result boxplot = boxplot_frame_groupby From 21e0a85aaf023fd5bd111aa7991b6c2421684e87 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 20 Feb 2023 10:20:35 +0800 Subject: [PATCH 11/13] what's new 2.0 --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index df958a63c4528..e6b42c1b1edab 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1376,6 +1376,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Bug in :meth:`DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) +- Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`) - Reshaping From ea2abf3b1a066a113ee0d64252caadcddc33f2c5 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 22 Feb 2023 10:24:27 +0800 Subject: [PATCH 12/13] Remove unclean whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e6b42c1b1edab..df958a63c4528 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1376,7 +1376,6 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Bug in :meth:`DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) -- Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`) - Reshaping From 0ec9d6ff7c694ff7f879c1c720066d8a19e83d5a Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 22 Feb 2023 10:28:05 +0800 Subject: [PATCH 13/13] what's new 2.1.0 --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b83f317814ad9..00fe5a062ea81 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -181,7 +181,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`) - Reshaping