From 4afc0aceffdbb8398576e5872a37c2b9055fbc16 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Feb 2020 19:34:36 -0800 Subject: [PATCH 1/6] Fix+test DTA+-object dtype 2D --- pandas/core/arrays/datetimelike.py | 24 ++++++++---------- pandas/tests/arithmetic/test_datetime64.py | 29 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e39d1dc03adf5..a4274a2af4b19 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -42,6 +42,7 @@ from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com +from pandas.core.construction import array, extract_array from pandas.core.indexers import check_array_indexer from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -621,7 +622,7 @@ def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_object_dtype(dtype): - return self._box_values(self.asi8) + return self._box_values(self.asi8.ravel()).reshape(self.shape) elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): @@ -1252,19 +1253,14 @@ def _addsub_object_array(self, other: np.ndarray, op): PerformanceWarning, ) - # For EA self.astype('O') returns a numpy array, not an Index - left = self.astype("O") - - res_values = op(left, np.array(other)) - kwargs = {} - if not is_period_dtype(self): - kwargs["freq"] = "infer" - try: - res = type(self)._from_sequence(res_values, **kwargs) - except ValueError: - # e.g. we've passed a Timestamp to TimedeltaArray - res = res_values - return res + # TODO: could there be cases where self.shape isn't the right outshape? + res_values = op(self.astype("O"), np.array(other)) + result = array(res_values.ravel()) + result = extract_array(result, extract_numpy=True).reshape(self.shape) + if result.dtype.kind in ["m", "M"] and result.ndim == 1: + # i.e. DatetimeArray or TimedeltaArray + result.freq = result.inferred_freq + return result def _time_shift(self, periods, freq=None): """ diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index d3f9ac4f3f8b2..1b002e3dc9300 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -27,6 +27,7 @@ date_range, ) import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, TimedeltaArray from pandas.core.ops import roperator from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, @@ -2395,3 +2396,31 @@ def test_shift_months(years, months): raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti] expected = DatetimeIndex(raw) tm.assert_index_equal(actual, expected) + + +def test_datetimelike_array_addsub_object_dtype_2d(): + # block-wise DataFrame operations will require operating on 2D + # DatetimeArray/TimedeltaArray, so check that specifically. + dti = pd.date_range("1994-02-13", freq="2W", periods=4) + dta = dti._data.reshape((4, 1)) + + other = np.array([[pd.offsets.Day(n)] for n in range(4)]) + assert other.shape == dta.shape + + with tm.assert_produces_warning(PerformanceWarning): + result = dta + other + with tm.assert_produces_warning(PerformanceWarning): + expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) + + assert isinstance(result, DatetimeArray) + assert result.freq is None + tm.assert_numpy_array_equal(result._data, expected._data) + + with tm.assert_produces_warning(PerformanceWarning): + # Case where we expect to get a TimedeltaArray back + result2 = dta - dta.astype(object) + + assert isinstance(result2, TimedeltaArray) + assert result2.shape == (4, 1) + assert result2.freq is None + assert (result2.asi8 == 0).all() From 63cd78e6610919e1626504e254b7a9c02881f365 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 12:29:24 -0800 Subject: [PATCH 2/6] dont infer --- pandas/core/arrays/datetimelike.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a4274a2af4b19..ee4f4cce8940f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1257,9 +1257,6 @@ def _addsub_object_array(self, other: np.ndarray, op): res_values = op(self.astype("O"), np.array(other)) result = array(res_values.ravel()) result = extract_array(result, extract_numpy=True).reshape(self.shape) - if result.dtype.kind in ["m", "M"] and result.ndim == 1: - # i.e. DatetimeArray or TimedeltaArray - result.freq = result.inferred_freq return result def _time_shift(self, periods, freq=None): From 52d189ed873e1de0edcd89f3482af29dff98b281 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 12:30:41 -0800 Subject: [PATCH 3/6] comment --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8d997a74e0d2c..45de4ff88dbd9 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1255,7 +1255,7 @@ def _addsub_object_array(self, other: np.ndarray, op): PerformanceWarning, ) - # TODO: could there be cases where self.shape isn't the right outshape? + # Note: we are assuming here that shapes match. res_values = op(self.astype("O"), np.array(other)) result = array(res_values.ravel()) result = extract_array(result, extract_numpy=True).reshape(self.shape) From a45d6e2fae8a488cc9b0fa378b2337e183ea113a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 13:06:59 -0800 Subject: [PATCH 4/6] tests --- pandas/tests/arithmetic/test_datetime64.py | 12 +++++++++++- pandas/tests/arithmetic/test_timedelta64.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 1b002e3dc9300..61ebfce1a4b42 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -957,6 +957,16 @@ def test_dt64arr_sub_NaT(self, box_with_array): # ------------------------------------------------------------- # Subtraction of datetime-like array-like + def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + expected = dti - dti + + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj - obj.astype(object) + tm.assert_equal(result, expected) + def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): dti = pd.date_range("2016-01-01", periods=3, tz=None) dt64vals = dti.values @@ -2398,7 +2408,7 @@ def test_shift_months(years, months): tm.assert_index_equal(actual, expected) -def test_datetimelike_array_addsub_object_dtype_2d(): +def test_dt64arr_addsub_object_dtype_2d(): # block-wise DataFrame operations will require operating on 2D # DatetimeArray/TimedeltaArray, so check that specifically. dti = pd.date_range("1994-02-13", freq="2W", periods=4) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index abdeb1b30b626..1ad3ead4bb3e3 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -519,6 +519,18 @@ def test_tda_add_sub_index(self): expected = tdi - tdi tm.assert_index_equal(result, expected) + def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture): + # Result should be cast back to DatetimeArray + dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) + dti._set_freq(None) + tdi = dti - dti + + obj = tm.box_expected(tdi, box_df_fail) + other = tm.box_expected(dti, box_df_fail) + + result = obj + other.astype(object) + tm.assert_equal(result, other) + # ------------------------------------------------------------- # Binary operations TimedeltaIndex and timedelta-like From ffa221cef35ab60bfc927558cc982cd4a9e43729 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 22 Feb 2020 17:57:39 -0800 Subject: [PATCH 5/6] catch perf warning --- pandas/tests/arithmetic/test_datetime64.py | 4 +++- pandas/tests/arithmetic/test_timedelta64.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 61ebfce1a4b42..f7211ab5f9fd4 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -964,7 +964,9 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): obj = tm.box_expected(dti, box_with_array) expected = tm.box_expected(expected, box_with_array) - result = obj - obj.astype(object) + warn = PerformanceWarning if box_with_array is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + result = obj - obj.astype(object) tm.assert_equal(result, expected) def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 1ad3ead4bb3e3..8c415f18b7a8a 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -528,7 +528,9 @@ def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture): obj = tm.box_expected(tdi, box_df_fail) other = tm.box_expected(dti, box_df_fail) - result = obj + other.astype(object) + warn = PerformanceWarning if box_df_fail is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + result = obj + other.astype(object) tm.assert_equal(result, other) # ------------------------------------------------------------- From 04d10ebbc39a3ba6d424191c68b876cc9ecb7d8b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Feb 2020 16:52:35 -0800 Subject: [PATCH 6/6] add assertion --- pandas/core/arrays/datetimelike.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 45de4ff88dbd9..153a121132ca9 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1255,7 +1255,9 @@ def _addsub_object_array(self, other: np.ndarray, op): PerformanceWarning, ) - # Note: we are assuming here that shapes match. + # Caller is responsible for broadcasting if necessary + assert self.shape == other.shape, (self.shape, other.shape) + res_values = op(self.astype("O"), np.array(other)) result = array(res_values.ravel()) result = extract_array(result, extract_numpy=True).reshape(self.shape)