From d8c504b97d24958ac3e653729d74dd1e5ce9321e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 15:40:55 -0800 Subject: [PATCH 1/2] BUG: Make DTI/TDI/PI argsort match their underlying arrays --- pandas/core/groupby/grouper.py | 3 ++- pandas/core/indexes/base.py | 4 ---- pandas/tests/indexes/datetimelike.py | 8 ++++++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e8af9da30a298..38725a3c16b11 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -374,7 +374,8 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): # possibly sort if (self.sort or sort) and not ax.is_monotonic: # use stable sort to support first, last, nth - indexer = self.indexer = ax.argsort(kind="mergesort") + # TODO: why does putting na_position="first" fix datetimelike cases? + indexer = self.indexer = ax.argsort(kind="mergesort", na_position="first") ax = ax.take(indexer) obj = obj.take(indexer, axis=self.axis) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5209d83ade309..fb18b32aa4a73 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4743,10 +4743,6 @@ def argsort(self, *args, **kwargs) -> np.ndarray: >>> idx[order] Index(['a', 'b', 'c', 'd'], dtype='object') """ - if needs_i8_conversion(self.dtype): - # TODO: these do not match the underlying EA argsort methods GH#37863 - return self.asi8.argsort(*args, **kwargs) - # This works for either ndarray or EA, is overriden # by RangeIndex, MultIIndex return self._data.argsort(*args, **kwargs) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 6f078237e3a97..a5a496e2a6ca2 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -9,6 +9,14 @@ class DatetimeLike(Base): + def test_argsort_matches_array(self): + rng = self.create_index() + rng = rng.insert(1, pd.NaT) + + result = rng.argsort() + expected = rng._data.argsort() + tm.assert_numpy_array_equal(result, expected) + def test_argmax_axis_invalid(self): # GH#23081 msg = r"`axis` must be fewer than the number of dimensions \(1\)" From 5ba963c76381a4ee22086249b29ff1f3d19a4cc2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 21 Nov 2020 09:29:06 -0800 Subject: [PATCH 2/2] non-ea compat --- pandas/core/groupby/grouper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 38725a3c16b11..73227bb6ec159 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -375,7 +375,9 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): if (self.sort or sort) and not ax.is_monotonic: # use stable sort to support first, last, nth # TODO: why does putting na_position="first" fix datetimelike cases? - indexer = self.indexer = ax.argsort(kind="mergesort", na_position="first") + indexer = self.indexer = ax.array.argsort( + kind="mergesort", na_position="first" + ) ax = ax.take(indexer) obj = obj.take(indexer, axis=self.axis)