From 9751f6d5fd8784a96701d900c52e6aefc4e7d951 Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Sun, 18 Dec 2022 03:03:42 +0530 Subject: [PATCH 1/8] Fix axis=1 behaviour for str reducer idxmin --- pandas/core/groupby/generic.py | 7 ++----- pandas/tests/groupby/transform/test_transform.py | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 955f65585963d..c32e9ec4e8184 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1884,12 +1884,9 @@ def func(df): numeric_only_default="False", ) def idxmin( - self, - axis: Axis = 0, - skipna: bool = True, - numeric_only: bool = False, + self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False, **kwargs ) -> DataFrame: - axis = DataFrame._get_axis_number(axis) + axis = kwargs.setdefault("axis", self.axis) def func(df): res = df._reduce( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index d0c8b53f13399..de379c4add554 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -186,7 +186,6 @@ def test_transform_axis_1_reducer(request, reduction_func): if reduction_func in ( "corrwith", "idxmax", - "idxmin", "ngroup", "nth", ): From 915058120e32c7eb40b4a4fc9fa28686d174aa6f Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Sun, 18 Dec 2022 03:23:55 +0530 Subject: [PATCH 2/8] Add DataFrame._get_axis_number --- pandas/core/groupby/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c32e9ec4e8184..422d1bc1dd41b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1887,6 +1887,7 @@ def idxmin( self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False, **kwargs ) -> DataFrame: axis = kwargs.setdefault("axis", self.axis) + axis = DataFrame._get_axis_number(axis) def func(df): res = df._reduce( From 78c097747a75b7246ce90651c348ab53452bc5fc Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Fri, 23 Dec 2022 01:09:17 +0530 Subject: [PATCH 3/8] Ignore axis argument for groupby idxmin --- pandas/core/groupby/generic.py | 80 +++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 422d1bc1dd41b..fff502909a485 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1879,14 +1879,82 @@ def func(df): ) return result - @doc( - _shared_docs["idxmin"], - numeric_only_default="False", - ) def idxmin( - self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False, **kwargs + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool = False, ) -> DataFrame: - axis = kwargs.setdefault("axis", self.axis) + """ + Return index of first occurrence of minimum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + The axis argument is ignored, instead we use the grouper's axis. + + .. versionchanged:: 1.5.3 + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of minima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmin : Return index of the minimum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the minimum value in each column. + + >>> df.idxmin() + consumption Pork + co2_emissions Wheat Products + dtype: object + + To return the index for the minimum value in each row, use ``axis="columns"``. + + >>> df.idxmin(axis="columns") + Pork consumption + Wheat Products co2_emissions + Beef consumption + dtype: object + """ + + axis = self.axis axis = DataFrame._get_axis_number(axis) def func(df): From 63bcc5f9dea99a531798aa1ea9aa0253169e80e6 Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Fri, 23 Dec 2022 12:13:44 +0530 Subject: [PATCH 4/8] Fix doctest --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fff502909a485..c8487621ae55f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1928,8 +1928,8 @@ def idxmin( -------- Consider a dataset containing food consumption in Argentina. - >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48], - ... 'co2_emissions': [37.2, 19.66, 1712]}}, + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, ... index=['Pork', 'Wheat Products', 'Beef']) >>> df From b2e6567b4f4b7726aec8ee251a6c0932b6a960da Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Thu, 29 Dec 2022 22:48:24 +0530 Subject: [PATCH 5/8] Fix idxmax axis=1 failure --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/groupby/generic.py | 79 ++++++++++++++++--- .../tests/groupby/transform/test_transform.py | 1 - 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 8b91507e84a56..ff7da9b9ee3d6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -903,6 +903,7 @@ Groupby/resample/rolling - Bug in :meth:`.SeriesGroupBy.describe` with ``as_index=False`` would have the incorrect shape (:issue:`49256`) - Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` with ``dropna=False`` would drop NA values when the grouper was categorical (:issue:`36327`) - Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`). The method now ignores the ``axis`` argument, instead we use the grouper's ``axis``. Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c8487621ae55f..f72ddfd672ea7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -95,7 +95,6 @@ ) from pandas.core.indexes.category import CategoricalIndex from pandas.core.series import Series -from pandas.core.shared_docs import _shared_docs from pandas.core.util.numba_ import maybe_use_numba from pandas.plotting import boxplot_frame_groupby @@ -1848,17 +1847,81 @@ def nunique(self, dropna: bool = True) -> DataFrame: return results - @doc( - _shared_docs["idxmax"], - numeric_only_default="False", - ) def idxmax( self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: - axis = DataFrame._get_axis_number(axis) + """ + Return index of first occurrence of maximum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + The axis argument is ignored, instead we use the grouper's axis. + + .. versionchanged:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + Returns + ------- + Series + Indexes of maxima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmax : Return index of the maximum element. + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + + Examples + -------- + Consider a dataset containing food consumption in Argentina. + + >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], + ... 'co2_emissions': [37.2, 19.66, 1712]}, + ... index=['Pork', 'Wheat Products', 'Beef']) + + >>> df + consumption co2_emissions + Pork 10.51 37.20 + Wheat Products 103.11 19.66 + Beef 55.48 1712.00 + + By default, it returns the index for the maximum value in each column. + + >>> df.idxmax() + consumption Wheat Products + co2_emissions Beef + dtype: object + + To return the index for the maximum value in each row, use ``axis="columns"``. + + >>> df.idxmax(axis="columns") + Pork co2_emissions + Wheat Products consumption + Beef co2_emissions + dtype: object + """ + axis = self.axis def func(df): res = df._reduce( @@ -1896,7 +1959,7 @@ def idxmin( The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. The axis argument is ignored, instead we use the grouper's axis. - .. versionchanged:: 1.5.3 + .. versionchanged:: 2.0.0 skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result @@ -1953,9 +2016,7 @@ def idxmin( Beef consumption dtype: object """ - axis = self.axis - axis = DataFrame._get_axis_number(axis) def func(df): res = df._reduce( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index de379c4add554..3d38639904dc2 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -185,7 +185,6 @@ def test_transform_axis_1_reducer(request, reduction_func): # GH#45715 if reduction_func in ( "corrwith", - "idxmax", "ngroup", "nth", ): From 564c6fc69fd5890b5f56ebb992dc12e46f344554 Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Sat, 31 Dec 2022 01:04:48 +0530 Subject: [PATCH 6/8] Modify whatsnew entry --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f952dd1a83438..4817fb6829888 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -944,7 +944,7 @@ Groupby/resample/rolling - Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`) - Bug in :meth:`.SeriesGroupBy.nth` would raise when grouper contained NA values after subsetting from a :class:`DataFrameGroupBy` (:issue:`26454`) - Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`) -- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`). The method now ignores the ``axis`` argument, instead we use the grouper's ``axis``. +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`) Reshaping ^^^^^^^^^ From 41bcc9a2f68c52c340f94cb477fcdfff9de95a87 Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Thu, 5 Jan 2023 01:01:18 +0530 Subject: [PATCH 7/8] Default axis to None --- pandas/core/groupby/generic.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f72ddfd672ea7..4c4a344d62140 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1849,7 +1849,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: def idxmax( self, - axis: Axis = 0, + axis: Axis = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: @@ -1860,9 +1860,9 @@ def idxmax( Parameters ---------- - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {{0 or 'index', 1 or 'columns'}}, default None The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - The axis argument is ignored, instead we use the grouper's axis. + If axis is not provided, grouper's axis is used. .. versionchanged:: 2.0.0 @@ -1921,7 +1921,8 @@ def idxmax( Beef co2_emissions dtype: object """ - axis = self.axis + if axis is None: + axis = self.axis def func(df): res = df._reduce( @@ -1944,7 +1945,7 @@ def func(df): def idxmin( self, - axis: Axis = 0, + axis: Axis = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: @@ -1955,9 +1956,9 @@ def idxmin( Parameters ---------- - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {{0 or 'index', 1 or 'columns'}}, default None The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - The axis argument is ignored, instead we use the grouper's axis. + If axis is not provided, grouper's axis is used. .. versionchanged:: 2.0.0 @@ -2016,7 +2017,8 @@ def idxmin( Beef consumption dtype: object """ - axis = self.axis + if axis is None: + axis = self.axis def func(df): res = df._reduce( From 62f2a600de1748eb00fa4d4d48a9ac569bfb62a1 Mon Sep 17 00:00:00 2001 From: iofall <50991099+iofall@users.noreply.github.com> Date: Thu, 5 Jan 2023 11:35:29 +0530 Subject: [PATCH 8/8] Change axis typehint for idxmax and idxmin --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4c4a344d62140..8cd1a2543e23a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1849,7 +1849,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: def idxmax( self, - axis: Axis = None, + axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: @@ -1945,7 +1945,7 @@ def func(df): def idxmin( self, - axis: Axis = None, + axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: