From 9ecda1a99c8b58270d9c8b03708a13e64144db7a Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Sep 2020 19:07:34 -0700 Subject: [PATCH 1/5] REF: assign _ndarray to _data, not asi8 --- pandas/core/arrays/datetimelike.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a5b8032974fa4..2b365850838a5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -463,13 +463,11 @@ class DatetimeLikeArrayMixin( @property def _ndarray(self) -> np.ndarray: # NB: A bunch of Interval tests fail if we use ._data - return self.asi8 + return self._data def _from_backing_data(self: _T, arr: np.ndarray) -> _T: # Note: we do not retain `freq` - # error: Too many arguments for "NDArrayBackedExtensionArray" - # error: Unexpected keyword argument "dtype" for "NDArrayBackedExtensionArray" - return type(self)(arr, dtype=self.dtype) # type: ignore[call-arg] + return type(self)._simple_new(arr, dtype=self.dtype) # ------------------------------------------------------------------ @@ -736,7 +734,13 @@ def _validate_fill_value(self, fill_value): fill_value = self._validate_scalar(fill_value, msg) except TypeError as err: raise ValueError(msg) from err - return self._unbox(fill_value) + rv = self._unbox(fill_value) + if self.dtype.kind == "M": + return np.int64(rv).view("M8[ns]") + elif self.dtype.kind == "m": + return np.int64(rv).view("m8[ns]") + else: + return rv def _validate_shift_value(self, fill_value): # TODO(2.0): once this deprecation is enforced, use _validate_fill_value From 24d28b6c9e29678f6c847dd66f8d565ccc637c10 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 5 Sep 2020 19:10:09 -0700 Subject: [PATCH 2/5] remove comment --- pandas/core/arrays/datetimelike.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2b365850838a5..0db5860885470 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -462,7 +462,6 @@ class DatetimeLikeArrayMixin( # kludge in libreduction @property def _ndarray(self) -> np.ndarray: - # NB: A bunch of Interval tests fail if we use ._data return self._data def _from_backing_data(self: _T, arr: np.ndarray) -> _T: From 64e665e698e54abdd76c50619cd704c47cee0475 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 09:07:21 -0700 Subject: [PATCH 3/5] REF: _ndarray use _data instead of asi8 --- pandas/core/arrays/datetimelike.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 0db5860885470..5f1f9da42201e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -27,7 +27,7 @@ from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import ( @@ -458,9 +458,7 @@ class DatetimeLikeArrayMixin( # ------------------------------------------------------------------ # NDArrayBackedExtensionArray compat - # TODO: make this a cache_readonly; need to get around _index_data - # kludge in libreduction - @property + @cache_readonly def _ndarray(self) -> np.ndarray: return self._data @@ -523,7 +521,7 @@ def __array__(self, dtype=None) -> np.ndarray: # used for Timedelta/DatetimeArray, overwritten by PeriodArray if is_object_dtype(dtype): return np.array(list(self), dtype=object) - return self._data + return self._ndarray def __getitem__(self, key): """ @@ -533,7 +531,7 @@ def __getitem__(self, key): if lib.is_integer(key): # fast-path - result = self._data[key] + result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) return self._simple_new(result, dtype=self.dtype) @@ -554,7 +552,7 @@ def __getitem__(self, key): key = check_array_indexer(self, key) freq = self._get_getitem_freq(key) - result = self._data[key] + result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) return self._simple_new(result, dtype=self.dtype, freq=freq) @@ -609,7 +607,7 @@ def __setitem__( value = self._validate_setitem_value(value) key = check_array_indexer(self, key) - self._data[key] = value + self._ndarray[key] = value self._maybe_clear_freq() def _maybe_clear_freq(self): @@ -660,8 +658,8 @@ def astype(self, dtype, copy=True): def view(self, dtype=None): if dtype is None or dtype is self.dtype: - return type(self)(self._data, dtype=self.dtype) - return self._data.view(dtype=dtype) + return type(self)(self._ndarray, dtype=self.dtype) + return self._ndarray.view(dtype=dtype) # ------------------------------------------------------------------ # ExtensionArray Interface @@ -702,7 +700,7 @@ def _from_factorized(cls, values, original): return cls(values, dtype=original.dtype) def _values_for_argsort(self): - return self._data + return self._ndarray # ------------------------------------------------------------------ # Validation Methods @@ -954,9 +952,9 @@ def value_counts(self, dropna=False): from pandas import Index, Series if dropna: - values = self[~self.isna()]._data + values = self[~self.isna()]._ndarray else: - values = self._data + values = self._ndarray cls = type(self) @@ -1047,9 +1045,9 @@ def fillna(self, value=None, method=None, limit=None): else: func = missing.backfill_1d - values = self._data + values = self._ndarray if not is_period_dtype(self.dtype): - # For PeriodArray self._data is i8, which gets copied + # For PeriodArray self._ndarray is i8, which gets copied # by `func`. Otherwise we need to make a copy manually # to avoid modifying `self` in-place. values = values.copy() From 0ec876ad197160831f15908e1f648bac4ee2f8e2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 13:24:37 -0700 Subject: [PATCH 4/5] REF: implement _rebox_native --- pandas/core/arrays/datetimelike.py | 21 +++++++++++++-------- pandas/core/arrays/datetimes.py | 4 ++++ pandas/core/arrays/period.py | 4 ++++ pandas/core/arrays/timedeltas.py | 4 ++++ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5f1f9da42201e..a218745db0a44 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -175,6 +175,14 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT: """ raise AbstractMethodError(self) + @classmethod + def _rebox_native(cls, value: int) -> Union[int, np.datetime64, np.timedelta64]: + """ + Box an integer unboxed via _unbox_scalar into the native type for + the underlying ndarray. + """ + raise AbstractMethodError(cls) + def _unbox_scalar(self, value: DTScalarOrNaT) -> int: """ Unbox the integer value of a scalar `value`. @@ -464,7 +472,9 @@ def _ndarray(self) -> np.ndarray: def _from_backing_data(self: _T, arr: np.ndarray) -> _T: # Note: we do not retain `freq` - return type(self)._simple_new(arr, dtype=self.dtype) + return type(self)._simple_new( # type: ignore[attr-defined] + arr, dtype=self.dtype + ) # ------------------------------------------------------------------ @@ -717,7 +727,7 @@ def _validate_fill_value(self, fill_value): Returns ------- - fill_value : np.int64 + fill_value : np.int64, np.datetime64, or np.timedelta64 Raises ------ @@ -732,12 +742,7 @@ def _validate_fill_value(self, fill_value): except TypeError as err: raise ValueError(msg) from err rv = self._unbox(fill_value) - if self.dtype.kind == "M": - return np.int64(rv).view("M8[ns]") - elif self.dtype.kind == "m": - return np.int64(rv).view("m8[ns]") - else: - return rv + return self._rebox_native(rv) def _validate_shift_value(self, fill_value): # TODO(2.0): once this deprecation is enforced, use _validate_fill_value diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1bea3a9eb137e..d913e7be9ae5f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -446,6 +446,10 @@ def _generate_range( # ----------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.datetime64: + return np.int64(value).view("M8[ns]") + def _unbox_scalar(self, value): if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index cc39ffb5d1203..c3a9430736969 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -253,6 +253,10 @@ def _generate_range(cls, start, end, periods, freq, fields): # ----------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.int64: + return np.int64(value) + def _unbox_scalar(self, value: Union[Period, NaTType]) -> int: if value is NaT: return value.value diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 2d694c469b3a9..485ebb49a376d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -271,6 +271,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # DatetimeLike Interface + @classmethod + def _rebox_native(cls, value: int) -> np.timedelta64: + return np.int64(value).view("m8[ns]") + def _unbox_scalar(self, value): if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") From 032c5428aa87371b3b7f181919fccda8277bb584 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 6 Sep 2020 14:17:43 -0700 Subject: [PATCH 5/5] update assertion --- pandas/tests/frame/indexing/test_datetime.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py index 1937a4c380dc9..1866ac341def6 100644 --- a/pandas/tests/frame/indexing/test_datetime.py +++ b/pandas/tests/frame/indexing/test_datetime.py @@ -23,7 +23,9 @@ def test_setitem(self, timezone_frame): b1 = df._mgr.blocks[1] b2 = df._mgr.blocks[2] tm.assert_extension_array_equal(b1.values, b2.values) - assert id(b1.values._data.base) != id(b2.values._data.base) + b1base = b1.values._data.base + b2base = b2.values._data.base + assert b1base is None or (id(b1base) != id(b2base)) # with nan df2 = df.copy()