REF: implement Groupby idxmin, idxmax without fallback (#38264)

jbrockmendel · web-flow · commit ca3e35111deb · 2020-12-04T18:41:41.000-05:00
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -50,14 +50,14 @@
 )
 from pandas.core.dtypes.missing import isna, notna
 
+from pandas.core import algorithms, nanops
 from pandas.core.aggregation import (
     agg_list_like,
     aggregate,
     maybe_mangle_lambdas,
     reconstruct_func,
     validate_func_kwargs,
 )
-import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
@@ -1826,4 +1826,46 @@ def nunique(self, dropna: bool = True) -> DataFrame:
             self._insert_inaxis_grouper_inplace(results)
         return results
 
+    @Appender(DataFrame.idxmax.__doc__)
+    def idxmax(self, axis=0, skipna: bool = True):
+        axis = DataFrame._get_axis_number(axis)
+        numeric_only = None if axis == 0 else False
+
+        def func(df):
+            # NB: here we use numeric_only=None, in DataFrame it is False GH#38217
+            res = df._reduce(
+                nanops.nanargmax,
+                "argmax",
+                axis=axis,
+                skipna=skipna,
+                numeric_only=numeric_only,
+            )
+            indices = res._values
+            index = df._get_axis(axis)
+            result = [index[i] if i >= 0 else np.nan for i in indices]
+            return df._constructor_sliced(result, index=res.index)
+
+        return self._python_apply_general(func, self._obj_with_exclusions)
+
+    @Appender(DataFrame.idxmin.__doc__)
+    def idxmin(self, axis=0, skipna: bool = True):
+        axis = DataFrame._get_axis_number(axis)
+        numeric_only = None if axis == 0 else False
+
+        def func(df):
+            # NB: here we use numeric_only=None, in DataFrame it is False GH#38217
+            res = df._reduce(
+                nanops.nanargmin,
+                "argmin",
+                axis=axis,
+                skipna=skipna,
+                numeric_only=numeric_only,
+            )
+            indices = res._values
+            index = df._get_axis(axis)
+            result = [index[i] if i >= 0 else np.nan for i in indices]
+            return df._constructor_sliced(result, index=res.index)
+
+        return self._python_apply_general(func, self._obj_with_exclusions)
+
     boxplot = boxplot_frame_groupby
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -11,7 +11,6 @@ class providing the base-class of operations.
 import datetime
 from functools import partial, wraps
 import inspect
-import re
 import types
 from typing import (
     Callable,
@@ -797,23 +796,7 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
-            try:
-                return self._python_apply_general(curried, self._obj_with_exclusions)
-            except TypeError as err:
-                if not re.search(
-                    "reduction operation '.*' not allowed for this dtype", str(err)
-                ):
-                    # We don't have a cython implementation
-                    # TODO: is the above comment accurate?
-                    raise
-
-            if self.obj.ndim == 1:
-                # this can be called recursively, so need to raise ValueError
-                raise ValueError
-
-            # GH#3688 try to operate item-by-item
-            result = self._aggregate_item_by_item(name, *args, **kwargs)
-            return result
+            return self._python_apply_general(curried, self._obj_with_exclusions)
 
         wrapper.__name__ = name
         return wrapper
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -539,6 +539,27 @@ def test_idxmin_idxmax_returns_int_types(func, values):
     tm.assert_frame_equal(result, expected)
 
 
+def test_idxmin_idxmax_axis1():
+    df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
+    df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4]
+
+    gb = df.groupby("A")
+
+    res = gb.idxmax(axis=1)
+
+    alt = df.iloc[:, 1:].idxmax(axis=1)
+    indexer = res.index.get_level_values(1)
+
+    tm.assert_series_equal(alt[indexer], res.droplevel("A"))
+
+    df["E"] = pd.date_range("2016-01-01", periods=10)
+    gb2 = df.groupby("A")
+
+    msg = "reduction operation 'argmax' not allowed for this dtype"
+    with pytest.raises(TypeError, match=msg):
+        gb2.idxmax(axis=1)
+
+
 def test_groupby_cumprod():
     # GH 4095
     df = DataFrame({"key": ["b"] * 10, "value": 2})