diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bae22505145b5..e476c3566c10f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -156,7 +156,7 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): _infer_matches: Tuple[str, ...] _is_recognized_dtype: Callable[[DtypeObj], bool] _recognized_scalars: Tuple[Type, ...] - _data: np.ndarray + _ndarray: np.ndarray def __init__(self, data, dtype: Optional[Dtype] = None, freq=None, copy=False): raise AbstractMethodError(self) @@ -253,9 +253,24 @@ def _check_compatible_with( # ------------------------------------------------------------------ # NDArrayBackedExtensionArray compat + def __setstate__(self, state): + if isinstance(state, dict): + if "_data" in state and "_ndarray" not in state: + # backward compat, changed what is property vs attribute + state["_ndarray"] = state.pop("_data") + for key, value in state.items(): + setattr(self, key, value) + else: + # PeriodArray, bc it mixes in a cython class + if isinstance(state, tuple) and len(state) == 1: + state = state[0] + self.__setstate__(state) + else: + raise TypeError(state) + @cache_readonly - def _ndarray(self) -> np.ndarray: - return self._data + def _data(self) -> np.ndarray: + return self._ndarray def _from_backing_data( self: DatetimeLikeArrayT, arr: np.ndarray @@ -294,7 +309,7 @@ def asi8(self) -> np.ndarray: An ndarray with int64 dtype. """ # do not cache or you'll create a memory leak - return self._data.view("i8") + return self._ndarray.view("i8") # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3982a7deca2bb..28e469547fe62 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -261,7 +261,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): if freq is None: freq = values.freq - values = values._data + values = values._ndarray if not isinstance(values, np.ndarray): raise ValueError( @@ -303,7 +303,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): # be incorrect(ish?) for the array as a whole dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) - self._data = values + self._ndarray = values self._dtype = dtype self._freq = freq @@ -320,7 +320,7 @@ def _simple_new( values = values.view(DT64NS_DTYPE) result = object.__new__(cls) - result._data = values + result._ndarray = values result._freq = freq result._dtype = dtype return result @@ -618,7 +618,7 @@ def astype(self, dtype, copy=True): elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype: # unit conversion e.g. datetime64[s] - return self._data.astype(dtype) + return self._ndarray.astype(dtype) elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) @@ -1138,7 +1138,7 @@ def to_period(self, freq=None): freq = res - return PeriodArray._from_datetime64(self._data, freq, tz=self.tz) + return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) def to_perioddelta(self, freq): """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 109be2c67bb1a..96a159c0804c9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -181,6 +181,8 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): _datetimelike_ops = _field_ops + _object_ops + _bool_ops _datetimelike_methods = ["strftime", "to_timestamp", "asfreq"] + __setstate__ = dtl.DatelikeOps.__setstate__ + # -------------------------------------------------------------------- # Constructors @@ -201,10 +203,10 @@ def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False) if isinstance(values, type(self)): if freq is not None and freq != values.freq: raise raise_on_incompatible(values, freq) - values, freq = values._data, values.freq + values, freq = values._ndarray, values.freq values = np.array(values, dtype="int64", copy=copy) - self._data = values + self._ndarray = values if freq is None: raise ValueError("freq is not specified and cannot be inferred") self._dtype = PeriodDtype(freq) @@ -347,7 +349,7 @@ def __arrow_array__(self, type=None): if type is not None: if pyarrow.types.is_integer(type): - return pyarrow.array(self._data, mask=self.isna(), type=type) + return pyarrow.array(self._ndarray, mask=self.isna(), type=type) elif isinstance(type, ArrowPeriodType): # ensure we have the same freq if self.freqstr != type.freq: @@ -361,7 +363,7 @@ def __arrow_array__(self, type=None): ) period_type = ArrowPeriodType(self.freqstr) - storage_array = pyarrow.array(self._data, mask=self.isna(), type="int64") + storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64") return pyarrow.ExtensionArray.from_storage(period_type, storage_array) # -------------------------------------------------------------------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 893644be23a0e..1a4ee52e414b4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -163,6 +163,8 @@ def dtype(self) -> np.dtype: # ---------------------------------------------------------------- # Constructors + _freq = None + def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): values = extract_array(values) @@ -179,7 +181,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): elif freq and values.freq: freq = to_offset(freq) freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) - values = values._data + values = values._ndarray if not isinstance(values, np.ndarray): msg = ( @@ -211,7 +213,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): if freq: freq = to_offset(freq) - self._data = values + self._ndarray = values self._dtype = dtype self._freq = freq @@ -229,7 +231,7 @@ def _simple_new( values = values.view(TD64NS_DTYPE) result = object.__new__(cls) - result._data = values + result._ndarray = values result._freq = to_offset(freq) result._dtype = TD64NS_DTYPE return result @@ -341,7 +343,7 @@ def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if dtype.kind == "m": - return astype_td64_unit_conversion(self._data, dtype, copy=copy) + return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) @@ -415,8 +417,8 @@ def _formatter(self, boxed=False): def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): from pandas.io.formats.format import get_format_timedelta64 - formatter = get_format_timedelta64(self._data, na_rep) - return np.array([formatter(x) for x in self._data]) + formatter = get_format_timedelta64(self._ndarray, na_rep) + return np.array([formatter(x) for x in self._ndarray]) # ---------------------------------------------------------------- # Arithmetic Methods @@ -485,7 +487,7 @@ def _addsub_object_array(self, other, op): def __mul__(self, other) -> TimedeltaArray: if is_scalar(other): # numpy will accept float and int, raise TypeError for others - result = self._data * other + result = self._ndarray * other freq = None if self.freq is not None and not isna(other): freq = self.freq * other @@ -508,7 +510,7 @@ def __mul__(self, other) -> TimedeltaArray: return type(self)(result) # numpy will accept float or int dtype, raise TypeError for others - result = self._data * other + result = self._ndarray * other return type(self)(result) __rmul__ = __mul__ @@ -526,11 +528,11 @@ def __truediv__(self, other): return result # otherwise, dispatch to Timedelta implementation - return self._data / other + return self._ndarray / other elif lib.is_scalar(other): # assume it is numeric - result = self._data / other + result = self._ndarray / other freq = None if self.freq is not None: # Tick division is not implemented, so operate on Timedelta @@ -546,7 +548,7 @@ def __truediv__(self, other): elif is_timedelta64_dtype(other.dtype): # let numpy handle it - return self._data / other + return self._ndarray / other elif is_object_dtype(other.dtype): # We operate on raveled arrays to avoid problems in inference @@ -568,7 +570,7 @@ def __truediv__(self, other): return result else: - result = self._data / other + result = self._ndarray / other return type(self)(result) @unpack_zerodim_and_defer("__rtruediv__") @@ -583,7 +585,7 @@ def __rtruediv__(self, other): return result # otherwise, dispatch to Timedelta implementation - return other / self._data + return other / self._ndarray elif lib.is_scalar(other): raise TypeError( @@ -599,7 +601,7 @@ def __rtruediv__(self, other): elif is_timedelta64_dtype(other.dtype): # let numpy handle it - return other / self._data + return other / self._ndarray elif is_object_dtype(other.dtype): # Note: unlike in __truediv__, we do not _need_ to do type @@ -626,7 +628,7 @@ def __floordiv__(self, other): return result # dispatch to Timedelta implementation - result = other.__rfloordiv__(self._data) + result = other.__rfloordiv__(self._ndarray) return result # at this point we should only have numeric scalars; anything @@ -670,7 +672,7 @@ def __floordiv__(self, other): return result elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): - result = self._data // other + result = self._ndarray // other return type(self)(result) else: @@ -690,7 +692,7 @@ def __rfloordiv__(self, other): return result # dispatch to Timedelta implementation - result = other.__floordiv__(self._data) + result = other.__floordiv__(self._ndarray) return result raise TypeError( @@ -760,15 +762,15 @@ def __rdivmod__(self, other): def __neg__(self) -> TimedeltaArray: if self.freq is not None: - return type(self)(-self._data, freq=-self.freq) - return type(self)(-self._data) + return type(self)(-self._ndarray, freq=-self.freq) + return type(self)(-self._ndarray) def __pos__(self) -> TimedeltaArray: - return type(self)(self._data, freq=self.freq) + return type(self)(self._ndarray, freq=self.freq) def __abs__(self) -> TimedeltaArray: # Note: freq is not preserved - return type(self)(np.abs(self._data)) + return type(self)(np.abs(self._ndarray)) # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timedelta methods @@ -946,9 +948,12 @@ def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"): data = np.array(data, copy=False) elif isinstance(data, ABCSeries): data = data._values - elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArray)): + elif isinstance(data, ABCTimedeltaIndex): + inferred_freq = data.freq + data = data._data._ndarray + elif isinstance(data, TimedeltaArray): inferred_freq = data.freq - data = data._data + data = data._ndarray elif isinstance(data, IntegerArray): data = data.to_numpy("int64", na_value=tslibs.iNaT) elif is_categorical_dtype(data.dtype): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e1f2a40598963..6d5992540ef49 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -150,7 +150,7 @@ def _simple_new( result._cache = {} # For groupby perf. See note in indexes/base about _index_data - result._index_data = values._data + result._index_data = values._ndarray result._reset_identity() return result @@ -165,7 +165,7 @@ def _is_all_dates(self) -> bool: @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. - return self._data._data + return self._data._ndarray def __array_wrap__(self, result, context=None): """