From cf412aeacdde2670c85a7e1f475e6457f512d176 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Nov 2020 17:41:57 -0800 Subject: [PATCH 1/3] ENH: 2D compat for DTA tz_localize, to_period --- pandas/core/arrays/datetimelike.py | 28 ++++++++++++++++++++++-- pandas/core/arrays/datetimes.py | 7 ++++-- pandas/core/arrays/period.py | 1 + pandas/core/arrays/timedeltas.py | 3 ++- pandas/tests/arrays/test_datetimelike.py | 9 ++++++++ pandas/tests/arrays/test_datetimes.py | 11 ++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3b419f8d1da2a..e9dcd06d558e3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime, timedelta +from functools import wraps import operator from typing import ( TYPE_CHECKING, @@ -80,6 +81,26 @@ DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") +def ravel_compat(meth): + """ + Decorator to ravel a 2D array before passing it to a cython operation, + then reshape the result to our own shape. + """ + + @wraps(meth) + def method(self, *args, **kwargs): + if self.ndim == 1: + return meth(self, *args, **kwargs) + + flags = self._ndarray.flags + flat = self.ravel("K") + result = meth(flat, *args, **kwargs) + order = "F" if flags.f_contiguous else "C" + return result.reshape(self.shape, order=order) + + return method + + class InvalidComparison(Exception): """ Raised by _validate_comparison_value to indicate to caller it should @@ -681,7 +702,7 @@ def value_counts(self, dropna=False): cls = type(self) - result = value_counts(values, sort=False, dropna=dropna) + result = value_counts(values.ravel("K"), sort=False, dropna=dropna) index = Index( cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name ) @@ -758,6 +779,9 @@ def freq(self, value): value = to_offset(value) self._validate_frequency(self, value) + if self.ndim > 1: + raise ValueError("Cannot set freq with ndim > 1") + self._freq = value @property @@ -854,7 +878,7 @@ def _is_monotonic_decreasing(self): @property def _is_unique(self): - return len(unique1d(self.asi8)) == len(self) + return len(unique1d(self.asi8.ravel("K"))) == self.size # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7c6b38d9114ab..7149aa2f01672 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -610,14 +610,15 @@ def astype(self, dtype, copy=True): # ----------------------------------------------------------------- # Rendering Methods + @dtl.ravel_compat def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): from pandas.io.formats.format import get_format_datetime64_from_values fmt = get_format_datetime64_from_values(self, date_format) return tslib.format_array_from_datetime( - self.asi8.ravel(), tz=self.tz, format=fmt, na_rep=na_rep - ).reshape(self.shape) + self.asi8, tz=self.tz, format=fmt, na_rep=na_rep + ) # ----------------------------------------------------------------- # Comparison Methods @@ -817,6 +818,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) + @dtl.ravel_compat def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): """ Localize tz-naive Datetime Array/Index to tz-aware @@ -1049,6 +1051,7 @@ def normalize(self): new_values = normalize_i8_timestamps(self.asi8, self.tz) return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) + @dtl.ravel_compat def to_period(self, freq=None): """ Cast to PeriodArray/Index at a particular frequency. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 80882acceb56a..89d62c1766175 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -561,6 +561,7 @@ def _formatter(self, boxed: bool = False): return str return "'{}'".format + @dtl.ravel_compat def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): """ actually format my specific types diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 035e6e84c6ec8..3434ba3d40389 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -406,11 +406,12 @@ def _formatter(self, boxed=False): return get_format_timedelta64(self, box=True) + @dtl.ravel_compat def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): from pandas.io.formats.format import get_format_timedelta64 formatter = get_format_timedelta64(self._data, na_rep) - return np.array([formatter(x) for x in self._data.ravel()]).reshape(self.shape) + return np.array([formatter(x) for x in self._data]) # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 94a5406eb1f8f..4486fbcca9378 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -712,6 +712,15 @@ def test_to_period(self, datetime_index, freqstr): # an EA-specific tm.assert_ function tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + def test_to_period_2d(self, arr1d): + arr2d = arr1d.reshape(1, -1) + + warn = None if arr1d.tz is None else UserWarning + with tm.assert_produces_warning(warn): + result = arr2d.to_period("D") + expected = arr1d.to_period("D").reshape(1, -1) + tm.assert_period_array_equal(result, expected) + @pytest.mark.parametrize("propname", pd.DatetimeIndex._bool_ops) def test_bool_properties(self, arr1d, propname): # in this case _bool_ops is just `is_leap_year` diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1d8ee9cf2b73b..b9ca6fc3a7578 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -449,6 +449,17 @@ def test_shift_requires_tzmatch(self): with pytest.raises(ValueError, match=msg): dta.shift(1, fill_value=fill_value) + def test_tz_localize_t2d(self): + dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") + dta = dti._data.reshape(3, 4) + result = dta.tz_localize(None) + + expected = dta.ravel().tz_localize(None).reshape(dta.shape) + tm.assert_datetime_array_equal(result, expected) + + roundtrip = expected.tz_localize("US/Pacific") + tm.assert_datetime_array_equal(roundtrip, dta) + class TestSequenceToDT64NS: def test_tz_dtype_mismatch_raises(self): From 41ee8f814a4eda0f71e1c18102b39b1662d84ea5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Nov 2020 19:30:24 -0800 Subject: [PATCH 2/3] move ravel_compat to mixins --- pandas/core/arrays/_mixins.py | 21 +++++++++++++++++++++ pandas/core/arrays/datetimelike.py | 24 ++---------------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 07862e0b9bb48..f7a8a8ab44c1c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,5 +1,6 @@ from __future__ import annotations +from functools import wraps from typing import Any, Optional, Sequence, Type, TypeVar, Union import numpy as np @@ -27,6 +28,26 @@ ) +def ravel_compat(meth): + """ + Decorator to ravel a 2D array before passing it to a cython operation, + then reshape the result to our own shape. + """ + + @wraps(meth) + def method(self, *args, **kwargs): + if self.ndim == 1: + return meth(self, *args, **kwargs) + + flags = self._ndarray.flags + flat = self.ravel("K") + result = meth(flat, *args, **kwargs) + order = "F" if flags.f_contiguous else "C" + return result.reshape(self.shape, order=order) + + return method + + class NDArrayBackedExtensionArray(ExtensionArray): """ ExtensionArray that is backed by a single NumPy ndarray. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e9dcd06d558e3..0949282c240f0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,7 +1,6 @@ from __future__ import annotations from datetime import datetime, timedelta -from functools import wraps import operator from typing import ( TYPE_CHECKING, @@ -65,7 +64,7 @@ from pandas.core import nanops, ops from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts from pandas.core.arraylike import OpsMixin -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray, ravel_compat import pandas.core.common as com from pandas.core.construction import array, extract_array from pandas.core.indexers import check_array_indexer, check_setitem_lengths @@ -81,26 +80,6 @@ DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") -def ravel_compat(meth): - """ - Decorator to ravel a 2D array before passing it to a cython operation, - then reshape the result to our own shape. - """ - - @wraps(meth) - def method(self, *args, **kwargs): - if self.ndim == 1: - return meth(self, *args, **kwargs) - - flags = self._ndarray.flags - flat = self.ravel("K") - result = meth(flat, *args, **kwargs) - order = "F" if flags.f_contiguous else "C" - return result.reshape(self.shape, order=order) - - return method - - class InvalidComparison(Exception): """ Raised by _validate_comparison_value to indicate to caller it should @@ -708,6 +687,7 @@ def value_counts(self, dropna=False): ) return Series(result._values, index=index, name=result.name) + @ravel_compat def map(self, mapper): # TODO(GH-23179): Add ExtensionArray.map # Need to figure out if we want ExtensionArray.map first. From 685f3a32cf1c293e37d6de1313051e0dffdf5e45 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 07:46:26 -0800 Subject: [PATCH 3/3] only 1d for value_counts --- pandas/core/arrays/datetimelike.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d31f50bad7dcd..ee1323b71f146 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -679,6 +679,9 @@ def value_counts(self, dropna: bool = False): ------- Series """ + if self.ndim != 1: + raise NotImplementedError + from pandas import Index, Series if dropna: @@ -688,7 +691,7 @@ def value_counts(self, dropna: bool = False): cls = type(self) - result = value_counts(values.ravel("K"), sort=False, dropna=dropna) + result = value_counts(values, sort=False, dropna=dropna) index = Index( cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name )