From f29fff2a84972ad6ff51a8a63c3560811843f4da Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 29 Nov 2020 20:03:38 -0800 Subject: [PATCH 1/3] REF: do idxmax, idxmin block-wise --- pandas/core/frame.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9cc9c9ef200cd..7414491a8d2b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9001,7 +9001,18 @@ def idxmin(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + + if self._can_fast_transpose or axis == 1: + # i.e. self.values call is cheap and non-casting (or unavoidable) + indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + else: + bm_axis = 1 - axis + + def blk_func(bvalues): + return nanops.nanargmin(bvalues, axis=bm_axis, skipna=skipna) + + mgr, _ = self._mgr.reduce(blk_func) + indices = mgr.as_array().ravel() # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -9074,7 +9085,18 @@ def idxmax(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + + if self._can_fast_transpose or axis == 1: + # i.e. self.values call is cheap and non-casting (or unavoidable) + indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + else: + bm_axis = 1 - axis + + def blk_func(bvalues): + return nanops.nanargmax(bvalues, axis=bm_axis, skipna=skipna) + + mgr, _ = self._mgr.reduce(blk_func) + indices = mgr.as_array().ravel() # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame From 84d5b31d631d4dbcea31da1a90c47283f39deb98 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 30 Nov 2020 15:17:45 -0800 Subject: [PATCH 2/3] BUG: DataFrame.idxmin/idxmax with mixed dtypes --- pandas/core/frame.py | 30 +++++++-------------------- pandas/tests/frame/test_reductions.py | 14 +++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7414491a8d2b0..5f149f10b05d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9002,17 +9002,10 @@ def idxmin(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) - if self._can_fast_transpose or axis == 1: - # i.e. self.values call is cheap and non-casting (or unavoidable) - indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) - else: - bm_axis = 1 - axis - - def blk_func(bvalues): - return nanops.nanargmin(bvalues, axis=bm_axis, skipna=skipna) - - mgr, _ = self._mgr.reduce(blk_func) - indices = mgr.as_array().ravel() + res = self._reduce( + nanops.nanargmin, "argmin", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -9086,17 +9079,10 @@ def idxmax(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) - if self._can_fast_transpose or axis == 1: - # i.e. self.values call is cheap and non-casting (or unavoidable) - indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) - else: - bm_axis = 1 - axis - - def blk_func(bvalues): - return nanops.nanargmax(bvalues, axis=bm_axis, skipna=skipna) - - mgr, _ = self._mgr.reduce(blk_func) - indices = mgr.as_array().ravel() + res = self._reduce( + nanops.nanargmax, "argmax", axis=axis, skipna=skipna, numeric_only=False + ) + indices = res._values # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 299f00e818105..8cf1e17dfd1d0 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -969,6 +969,20 @@ def test_idxmax(self, float_frame, int_frame): with pytest.raises(ValueError, match=msg): frame.idxmax(axis=2) + def test_idxmax_mixed_dtype(self): + # don't cast to object, which would raise in nanops + dti = pd.date_range("2016-01-01", periods=3) + + df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti}) + + result = df.idxmax() + expected = Series([1, 0, 2], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = df.idxmin() + expected = Series([0, 2, 0], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + # ---------------------------------------------------------------------- # Logical reductions From a526398c48a01919e66df914f0e58cc6061ae8b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Dec 2020 18:11:33 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 24db70481c136..c458b51600342 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -619,6 +619,7 @@ Numeric - Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`) - Bug in :meth:`DataFrame.std` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`) +- Bug in :meth:`DataFrame.idxmax` and :meth:`DataFrame.idxmin` with mixed dtypes incorrectly raising ``TypeError`` (:issue:`38195`) Conversion ^^^^^^^^^^