diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 699d8a81243db..d2433402662f7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -394,6 +394,9 @@ Other Deprecations - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`) - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`) +- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`) +- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 71d38d3b3f73b..4fecbe4be9681 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -39,6 +39,7 @@ ) from pandas._typing import npt from pandas.errors import PerformanceWarning +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive from pandas.core.dtypes.cast import astype_dt64_to_dt64tz @@ -509,6 +510,19 @@ def _check_compatible_with(self, other, setitem: bool = False): if setitem: # Stricter check for setitem vs comparison methods if not timezones.tz_compare(self.tz, other.tz): + # TODO(2.0): remove this check. GH#37605 + warnings.warn( + "Setitem-like behavior with mismatched timezones is deprecated " + "and will change in a future version. Instead of raising " + "(or for Index, Series, and DataFrame methods, coercing to " + "object dtype), the value being set (or passed as a " + "fill_value, or inserted) will be cast to the existing " + "DatetimeArray/DatetimeIndex/Series/DataFrame column's " + "timezone. To retain the old behavior, explicitly cast to " + "object dtype before the operation.", + FutureWarning, + stacklevel=find_stack_level(), + ) raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") # ----------------------------------------------------------------- diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1e150f1b431c7..c7c1ce6c04692 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -883,7 +883,15 @@ def test_take_fill_valid(self, arr1d): msg = "Timezones don't match. .* != 'Australia/Melbourne'" with pytest.raises(ValueError, match=msg): # require tz match, not just tzawareness match - arr.take([-1, 1], allow_fill=True, fill_value=value) + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezone" + ): + result = arr.take([-1, 1], allow_fill=True, fill_value=value) + + # once deprecation is enforced + # expected = arr.take([-1, 1], allow_fill=True, + # fill_value=value.tz_convert(arr.dtype.tz)) + # tm.assert_equal(result, expected) def test_concat_same_type_invalid(self, arr1d): # different timezones diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index b9c1113e7f441..180fb9d29224e 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -128,8 +128,14 @@ def test_setitem_different_tz_raises(self): with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): arr[0] = pd.Timestamp("2000") + ts = pd.Timestamp("2000", tz="US/Eastern") with pytest.raises(ValueError, match="US/Central"): - arr[0] = pd.Timestamp("2000", tz="US/Eastern") + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + arr[0] = ts + # once deprecation is enforced + # assert arr[0] == ts.tz_convert("US/Central") def test_setitem_clears_freq(self): a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) @@ -385,7 +391,14 @@ def test_shift_requires_tzmatch(self): msg = "Timezones don't match. 'UTC' != 'US/Pacific'" with pytest.raises(ValueError, match=msg): - dta.shift(1, fill_value=fill_value) + with tm.assert_produces_warning( + FutureWarning, match="mismatched timezones" + ): + dta.shift(1, fill_value=fill_value) + + # once deprecation is enforced + # expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) + # tm.assert_equal(result, expected) def test_tz_localize_t2d(self): dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index a89e089f3d8a2..5e321ad33a2bb 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1109,12 +1109,17 @@ def test_replace_datetimetz(self): # coerce to object result = df.copy() result.iloc[1, 0] = np.nan - result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific")) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = result.replace( + {"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific") + ) expected = DataFrame( { "A": [ Timestamp("20130101", tz="US/Eastern"), Timestamp("20130104", tz="US/Pacific"), + # once deprecation is enforced + # Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), Timestamp("20130103", tz="US/Eastern"), ], "B": [0, np.nan, 2], diff --git a/pandas/tests/indexes/datetimes/methods/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py index aa9b2c5291585..016a29e4cc266 100644 --- a/pandas/tests/indexes/datetimes/methods/test_insert.py +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -197,18 +197,32 @@ def test_insert_mismatched_tz(self): # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) item = Timestamp("2000-01-04", tz="US/Eastern") - result = idx.insert(3, item) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), + name="idx", ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) # mismatched tz -> cast to object (could reasonably cast to same tz) item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) - result = idx.insert(3, item) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" + list(idx[:3]) + [item] + list(idx[3:]), + dtype=object, + # once deprecation is enforced + # list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), + name="idx", ) + # once deprecation is enforced + # assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 9a22a16106469..27aeb411e36f0 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -237,11 +237,17 @@ def test_setitem_series_datetime64tz(self, val, exp_dtype): [ pd.Timestamp("2011-01-01", tz=tz), val, + # once deprecation is enforced + # val if getattr(val, "tz", None) is None else val.tz_convert(tz), pd.Timestamp("2011-01-03", tz=tz), pd.Timestamp("2011-01-04", tz=tz), ] ) - self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + warn = None + if getattr(val, "tz", None) is not None and val.tz != obj[0].tz: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="mismatched timezones"): + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) @pytest.mark.parametrize( "val,exp_dtype", @@ -467,9 +473,12 @@ def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value # mismatched tz --> cast to object (could reasonably cast to common tz) ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") - result = obj.insert(1, ts) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = obj.insert(1, ts) + # once deprecation is enforced: + # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) + # assert expected.dtype == obj.dtype expected = obj.astype(object).insert(1, ts) - assert expected.dtype == object tm.assert_index_equal(result, expected) else: @@ -990,11 +999,18 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): [ pd.Timestamp("2011-01-01", tz=tz), fill_val, + # Once deprecation is enforced, this becomes: + # fill_val.tz_convert(tz) if getattr(fill_val, "tz", None) + # is not None else fill_val, pd.Timestamp("2011-01-03", tz=tz), pd.Timestamp("2011-01-04", tz=tz), ] ) - self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + warn = None + if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="mismatched timezone"): + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @pytest.mark.xfail(reason="Test not implemented") def test_fillna_series_int64(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b82ecac37634e..cf2a4a75f95b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1890,7 +1890,8 @@ def test_setitem_with_expansion(self): # trying to set a single element on a part of a different timezone # this converts to object df2 = df.copy() - df2.loc[df2.new_col == "new", "time"] = v + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + df2.loc[df2.new_col == "new", "time"] = v expected = Series([v[0], df.loc[1, "time"]], name="time") tm.assert_series_equal(df2.time, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index a922a937ce9d3..5521bee09b19b 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -898,6 +898,18 @@ def expected(self): ) return expected + @pytest.fixture(autouse=True) + def assert_warns(self, request): + # check that we issue a FutureWarning about timezone-matching + if request.function.__name__ == "test_slice_key": + key = request.getfixturevalue("key") + if not isinstance(key, slice): + # The test is a no-op, so no warning will be issued + yield + return + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + yield + @pytest.mark.parametrize( "obj,expected", diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index a28da1d856cf9..2feaf4e951ab8 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -523,7 +523,8 @@ def test_datetime64_tz_fillna(self, tz): tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) - result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) expected = Series( [ Timestamp("2011-01-01 10:00", tz=tz), @@ -766,8 +767,15 @@ def test_fillna_datetime64_with_timezone_tzinfo(self): # but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent ts = Timestamp("2000-01-01", tz="US/Pacific") ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific")) - result = ser2.fillna(ts) + assert ser2.dtype.kind == "M" + with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): + result = ser2.fillna(ts) expected = Series([ser[0], ts, ser[2]], dtype=object) + # once deprecation is enforced + # expected = Series( + # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], + # dtype=ser2.dtype, + # ) tm.assert_series_equal(result, expected) def test_fillna_pos_args_deprecation(self):