Skip to content

DEPR: datetime64tz cast mismatched timezones on setitemlike #44243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ Other Deprecations
- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
- Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
- Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`)
-

.. ---------------------------------------------------------------------------

Expand Down
14 changes: 14 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from pandas._typing import npt
from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_inclusive

from pandas.core.dtypes.cast import astype_dt64_to_dt64tz
Expand Down Expand Up @@ -509,6 +510,19 @@ def _check_compatible_with(self, other, setitem: bool = False):
if setitem:
# Stricter check for setitem vs comparison methods
if not timezones.tz_compare(self.tz, other.tz):
# TODO(2.0): remove this check. GH#37605
warnings.warn(
"Setitem-like behavior with mismatched timezones is deprecated "
"and will change in a future version. Instead of raising "
"(or for Index, Series, and DataFrame methods, coercing to "
"object dtype), the value being set (or passed as a "
"fill_value, or inserted) will be cast to the existing "
"DatetimeArray/DatetimeIndex/Series/DataFrame column's "
"timezone. To retain the old behavior, explicitly cast to "
"object dtype before the operation.",
FutureWarning,
stacklevel=find_stack_level(),
)
raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'")

# -----------------------------------------------------------------
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,15 @@ def test_take_fill_valid(self, arr1d):
msg = "Timezones don't match. .* != 'Australia/Melbourne'"
with pytest.raises(ValueError, match=msg):
# require tz match, not just tzawareness match
arr.take([-1, 1], allow_fill=True, fill_value=value)
with tm.assert_produces_warning(
FutureWarning, match="mismatched timezone"
):
result = arr.take([-1, 1], allow_fill=True, fill_value=value)

# once deprecation is enforced
# expected = arr.take([-1, 1], allow_fill=True,
# fill_value=value.tz_convert(arr.dtype.tz))
# tm.assert_equal(result, expected)

def test_concat_same_type_invalid(self, arr1d):
# different timezones
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,14 @@ def test_setitem_different_tz_raises(self):
with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
arr[0] = pd.Timestamp("2000")

ts = pd.Timestamp("2000", tz="US/Eastern")
with pytest.raises(ValueError, match="US/Central"):
arr[0] = pd.Timestamp("2000", tz="US/Eastern")
with tm.assert_produces_warning(
FutureWarning, match="mismatched timezones"
):
arr[0] = ts
# once deprecation is enforced
# assert arr[0] == ts.tz_convert("US/Central")

def test_setitem_clears_freq(self):
a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
Expand Down Expand Up @@ -385,7 +391,14 @@ def test_shift_requires_tzmatch(self):

msg = "Timezones don't match. 'UTC' != 'US/Pacific'"
with pytest.raises(ValueError, match=msg):
dta.shift(1, fill_value=fill_value)
with tm.assert_produces_warning(
FutureWarning, match="mismatched timezones"
):
dta.shift(1, fill_value=fill_value)

# once deprecation is enforced
# expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC"))
# tm.assert_equal(result, expected)

def test_tz_localize_t2d(self):
dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific")
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,12 +1109,17 @@ def test_replace_datetimetz(self):
# coerce to object
result = df.copy()
result.iloc[1, 0] = np.nan
result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific"))
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = result.replace(
{"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific")
)
expected = DataFrame(
{
"A": [
Timestamp("20130101", tz="US/Eastern"),
Timestamp("20130104", tz="US/Pacific"),
# once deprecation is enforced
# Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"),
Timestamp("20130103", tz="US/Eastern"),
],
"B": [0, np.nan, 2],
Expand Down
22 changes: 18 additions & 4 deletions pandas/tests/indexes/datetimes/methods/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,32 @@ def test_insert_mismatched_tz(self):

# mismatched tz -> cast to object (could reasonably cast to same tz or UTC)
item = Timestamp("2000-01-04", tz="US/Eastern")
result = idx.insert(3, item)
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
list(idx[:3]) + [item] + list(idx[3:]),
dtype=object,
# once deprecation is enforced
# list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]),
name="idx",
)
# once deprecation is enforced
# assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)

# mismatched tz -> cast to object (could reasonably cast to same tz)
item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))
result = idx.insert(3, item)
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
list(idx[:3]) + [item] + list(idx[3:]),
dtype=object,
# once deprecation is enforced
# list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]),
name="idx",
)
# once deprecation is enforced
# assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand Down
24 changes: 20 additions & 4 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,17 @@ def test_setitem_series_datetime64tz(self, val, exp_dtype):
[
pd.Timestamp("2011-01-01", tz=tz),
val,
# once deprecation is enforced
# val if getattr(val, "tz", None) is None else val.tz_convert(tz),
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
warn = None
if getattr(val, "tz", None) is not None and val.tz != obj[0].tz:
warn = FutureWarning
with tm.assert_produces_warning(warn, match="mismatched timezones"):
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)

@pytest.mark.parametrize(
"val,exp_dtype",
Expand Down Expand Up @@ -467,9 +473,12 @@ def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value

# mismatched tz --> cast to object (could reasonably cast to common tz)
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
result = obj.insert(1, ts)
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = obj.insert(1, ts)
# once deprecation is enforced:
# expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
# assert expected.dtype == obj.dtype
expected = obj.astype(object).insert(1, ts)
assert expected.dtype == object
tm.assert_index_equal(result, expected)

else:
Expand Down Expand Up @@ -990,11 +999,18 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
[
pd.Timestamp("2011-01-01", tz=tz),
fill_val,
# Once deprecation is enforced, this becomes:
# fill_val.tz_convert(tz) if getattr(fill_val, "tz", None)
# is not None else fill_val,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
warn = None
if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz:
warn = FutureWarning
with tm.assert_produces_warning(warn, match="mismatched timezone"):
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)

@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_series_int64(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1890,7 +1890,8 @@ def test_setitem_with_expansion(self):
# trying to set a single element on a part of a different timezone
# this converts to object
df2 = df.copy()
df2.loc[df2.new_col == "new", "time"] = v
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
df2.loc[df2.new_col == "new", "time"] = v

expected = Series([v[0], df.loc[1, "time"]], name="time")
tm.assert_series_equal(df2.time, expected)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,18 @@ def expected(self):
)
return expected

@pytest.fixture(autouse=True)
def assert_warns(self, request):
# check that we issue a FutureWarning about timezone-matching
if request.function.__name__ == "test_slice_key":
key = request.getfixturevalue("key")
if not isinstance(key, slice):
# The test is a no-op, so no warning will be issued
yield
return
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
yield


@pytest.mark.parametrize(
"obj,expected",
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ def test_datetime64_tz_fillna(self, tz):
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)

result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Expand Down Expand Up @@ -766,8 +767,15 @@ def test_fillna_datetime64_with_timezone_tzinfo(self):
# but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent
ts = Timestamp("2000-01-01", tz="US/Pacific")
ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific"))
result = ser2.fillna(ts)
assert ser2.dtype.kind == "M"
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = ser2.fillna(ts)
expected = Series([ser[0], ts, ser[2]], dtype=object)
# once deprecation is enforced
# expected = Series(
# [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
# dtype=ser2.dtype,
# )
tm.assert_series_equal(result, expected)

def test_fillna_pos_args_deprecation(self):
Expand Down