From cd4895a583c12d84a9ec0e52de033ddc97d935ef Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 23 Jan 2022 10:27:06 -0800 Subject: [PATCH 1/4] DEPR: treating float data as wall-times in DTA._from_sequence --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/arrays/datetimes.py | 24 ++++++++++++++++++---- pandas/core/tools/datetimes.py | 2 +- pandas/tests/dtypes/test_missing.py | 23 +++++++++++++-------- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/series/methods/test_astype.py | 7 ++++++- 6 files changed, 43 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b32416418a39f..60f9915fe748a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -150,6 +150,7 @@ Other Deprecations - Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) - Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) - Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) +- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`??`) - diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 87acadc01faad..02ab200a59d62 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2063,7 +2063,7 @@ def _sequence_to_dt64ns( inferred_freq = data.freq # By this point we are assured to have either a numpy array or Index - data, copy = maybe_convert_dtype(data, copy) + data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) if ( @@ -2240,7 +2240,7 @@ def objects_to_datetime64ns( raise TypeError(result) -def maybe_convert_dtype(data, copy: bool): +def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): """ Convert data based on dtype conventions, issuing deprecation warnings or errors where appropriate. @@ -2249,6 +2249,7 @@ def maybe_convert_dtype(data, copy: bool): ---------- data : np.ndarray or pd.Index copy : bool + tz : tzinfo or None, default None Returns ------- @@ -2268,8 +2269,23 @@ def maybe_convert_dtype(data, copy: bool): # as wall-times instead of UTC timestamps. data = data.astype(DT64NS_DTYPE) copy = False - # TODO: deprecate this behavior to instead treat symmetrically - # with integer dtypes. See discussion in GH#23675 + if ( + tz is not None + and len(data) > 0 + and not timezones.is_utc(timezones.maybe_get_tz(tz)) + ): + # GH#23675 deprecate to treat symmetrically with integer dtypes + warnings.warn( + "The behavior of DatetimeArray._from_sequence with a timezone-aware " + "dtype and floating-dtype data is deprecated. In a future version, " + "this data will be interpreted as nanosecond UTC timestamps " + "instead of wall-times, matching the behavior with integer dtypes. " + "To retain the old behavior, explicitly cast to 'datetime64[ns]' " + "before passing the data to pandas. To get the future behavior, " + "first cast to 'int64'.", + FutureWarning, + stacklevel=find_stack_level(), + ) elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 4d9420fc0510d..eaed07f154b2a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -364,7 +364,7 @@ def _convert_listlike_datetimes( # NB: this must come after unit transformation orig_arg = arg try: - arg, _ = maybe_convert_dtype(arg, copy=False) + arg, _ = maybe_convert_dtype(arg, copy=False, tz=tz) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 1917fc615118a..a986e8d659202 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -415,23 +415,28 @@ def test_array_equivalent(dtype_equal): TimedeltaIndex([1, np.nan]), dtype_equal=dtype_equal, ) + + msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times" + with tm.assert_produces_warning(FutureWarning, match=msg): + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") + dti2 = DatetimeIndex([0, np.nan], tz="CET") + dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") + assert array_equivalent( - DatetimeIndex([0, np.nan], tz="US/Eastern"), - DatetimeIndex([0, np.nan], tz="US/Eastern"), + dti1, + dti1, dtype_equal=dtype_equal, ) assert not array_equivalent( - DatetimeIndex([0, np.nan], tz="US/Eastern"), - DatetimeIndex([1, np.nan], tz="US/Eastern"), + dti1, + dti3, dtype_equal=dtype_equal, ) # The rest are not dtype_equal + assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1) assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern") - ) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz="CET"), - DatetimeIndex([0, np.nan], tz="US/Eastern"), + dti2, + dti1, ) assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 83df57f922c9c..85a1e52c0c767 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -140,7 +140,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_bl assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" - values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data + values = DatetimeIndex(np.arange(N) * 10 ** 9, tz=tz)._data values = ensure_block_shape(values, ndim=len(shape)) elif typestr in ("timedelta", "td", "m8[ns]"): values = (mat * 1).astype("m8[ns]") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index b51cf92e8fd8b..13da009a85399 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -381,7 +381,12 @@ def test_astype_nan_to_bool(self): ) def test_astype_ea_to_datetimetzdtype(self, dtype): # GH37553 - result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific")) + ser = Series([4, 0, 9], dtype=dtype) + warn = FutureWarning if ser.dtype.kind == "f" else None + msg = "with a timezone-aware dtype and floating-dtype data" + with tm.assert_produces_warning(warn, match=msg): + result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) + expected = Series( { 0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"), From e9958ca3ab157e8ff617d577a7f276cf98a763f2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 23 Jan 2022 10:29:06 -0800 Subject: [PATCH 2/4] GH refs --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/arrays/datetimes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 60f9915fe748a..ad7c06f8a5701 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -150,7 +150,7 @@ Other Deprecations - Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) - Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) - Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) -- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`??`) +- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`) - diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 02ab200a59d62..8ff4057227bc2 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2274,7 +2274,7 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): and len(data) > 0 and not timezones.is_utc(timezones.maybe_get_tz(tz)) ): - # GH#23675 deprecate to treat symmetrically with integer dtypes + # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes warnings.warn( "The behavior of DatetimeArray._from_sequence with a timezone-aware " "dtype and floating-dtype data is deprecated. In a future version, " From 8043167f5930b2699bb66ad841663d7b847992cb Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Jan 2022 20:18:02 -0800 Subject: [PATCH 3/4] mypy fixup --- pandas/core/tools/datetimes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index eaed07f154b2a..6bdd14c50eb4a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -27,6 +27,7 @@ iNaT, nat_strings, parsing, + timezones, ) from pandas._libs.tslibs.parsing import ( # noqa:F401 DateParseError, @@ -364,7 +365,7 @@ def _convert_listlike_datetimes( # NB: this must come after unit transformation orig_arg = arg try: - arg, _ = maybe_convert_dtype(arg, copy=False, tz=tz) + arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz)) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) From 298c6b443134a15ca8465489455775b1c0c70fa9 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Feb 2022 07:42:39 -0800 Subject: [PATCH 4/4] lint fixup --- pandas/tests/internals/test_internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a443a5eeaac10..eda902d34bff5 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -140,7 +140,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_bl assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" - values = DatetimeIndex(np.arange(N) * 10 ** 9, tz=tz)._data + values = DatetimeIndex(np.arange(N) * 10**9, tz=tz)._data values = ensure_block_shape(values, ndim=len(shape)) elif typestr in ("timedelta", "td", "m8[ns]"): values = (mat * 1).astype("m8[ns]")