From c74adf46aab453a20b5451e4974aa56aeee4c520 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 16:11:55 -0700 Subject: [PATCH 1/3] COMPAT: match numpy behavior for searchsorted on dt64/td64 --- pandas/core/arrays/datetimelike.py | 15 +++++++++++---- pandas/core/arrays/datetimes.py | 4 ++++ pandas/core/arrays/period.py | 4 ++++ pandas/core/arrays/timedeltas.py | 4 ++++ pandas/tests/arrays/test_datetimelike.py | 11 ++++++++--- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5a44f87400b79..c2713c66b5524 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -176,6 +176,14 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT: """ raise AbstractMethodError(self) + @classmethod + def _rebox_native(cls, value: int) -> Union[int, np.datetime64, np.timedelta64]: + """ + Box an integer unboxed via _unbox_scalar into the native type for + the underlying ndarray. + """ + raise AbstractMethodError(cls) + def _unbox_scalar(self, value: DTScalarOrNaT) -> int: """ Unbox the integer value of a scalar `value`. @@ -866,7 +874,8 @@ def _validate_searchsorted_value(self, value): # TODO: cast_str? we accept it for scalar value = self._validate_listlike(value, "searchsorted") - return self._unbox(value) + rv = self._unbox(value) + return self._rebox_native(rv) def _validate_setitem_value(self, value): msg = ( @@ -945,9 +954,7 @@ def searchsorted(self, value, side="left", sorter=None): Array of insertion points with the same shape as `value`. """ value = self._validate_searchsorted_value(value) - - # TODO: Use datetime64 semantics for sorting, xref GH#29844 - return self.asi8.searchsorted(value, side=side, sorter=sorter) + return self._data.searchsorted(value, side=side, sorter=sorter) def value_counts(self, dropna=False): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1bea3a9eb137e..d913e7be9ae5f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -446,6 +446,10 @@ def _generate_range( # ----------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.datetime64: + return np.int64(value).view("M8[ns]") + def _unbox_scalar(self, value): if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index cc39ffb5d1203..c3a9430736969 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -253,6 +253,10 @@ def _generate_range(cls, start, end, periods, freq, fields): # ----------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.int64: + return np.int64(value) + def _unbox_scalar(self, value: Union[Period, NaTType]) -> int: if value is NaT: return value.value diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 2d694c469b3a9..485ebb49a376d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -271,6 +271,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.timedelta64: + return np.int64(value).view("m8[ns]") + def _unbox_scalar(self, value): if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b1ab700427c28..292557fc04258 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -241,10 +241,15 @@ def test_searchsorted(self): expected = np.array([2, 3], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) - # Following numpy convention, NaT goes at the beginning - # (unlike NaN which goes at the end) + # GH#29884 match numpy convention on whether NaT goes + # at the end or the beginning result = arr.searchsorted(pd.NaT) - assert result == 0 + if _np_version_under1p18 or self.array_cls is PeriodArray: + # Following numpy convention, NaT goes at the beginning + # (unlike NaN which goes at the end) + assert result == 0 + else: + assert result == 10 def test_getitem_2d(self, arr1d): # 2d slicing on a 1D array From 70fbbd0488d2cf79cbb833ad036477055af91e5b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Sep 2020 15:08:29 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ccaae9f996425..1a08ffa5b9730 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -228,6 +228,7 @@ Datetimelike - Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`) - Bug in :meth:`DatetimeIndex.get_slice_bound` where ``datetime.date`` objects were not accepted or naive :class:`Timestamp` with a tz-aware :class:`DatetimeIndex` (:issue:`35690`) - Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`) +- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted` placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`) Timedelta ^^^^^^^^^ From 7ef3642394e49b367cb834e1287f1a99ea0654fd Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Sep 2020 15:16:16 -0700 Subject: [PATCH 3/3] update whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1a08ffa5b9730..2afa1f1a6199e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -228,7 +228,7 @@ Datetimelike - Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`) - Bug in :meth:`DatetimeIndex.get_slice_bound` where ``datetime.date`` objects were not accepted or naive :class:`Timestamp` with a tz-aware :class:`DatetimeIndex` (:issue:`35690`) - Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`) -- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted` placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`) +- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64`` or ``timedelta64`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`) Timedelta ^^^^^^^^^