From 29700f8ee79312898eb37b417cfd4bc005b6e48b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Nov 2022 16:27:32 -0800 Subject: [PATCH 1/2] API: Float64Index.astype(datetimelike) --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/interval.py | 9 ++++++ pandas/core/indexes/base.py | 8 ------ pandas/tests/arithmetic/test_interval.py | 12 +------- pandas/tests/indexes/numeric/test_astype.py | 31 +++++++++++++-------- 5 files changed, 30 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 032bcf09244e5..9b9f8d9b8c889 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -329,6 +329,7 @@ Other API changes - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) +- :meth:`Index.astype` now allows casting from ``float64`` dtype to datetime-like dtypes, matching :class:`Series` behavior (:issue:`??`) - Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 77e2fdac26da9..5ebd882cd3a13 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -919,6 +919,15 @@ def astype(self, dtype, copy: bool = True): if dtype == self.dtype: return self.copy() if copy else self + if is_float_dtype(self.dtype.subtype) and needs_i8_conversion( + dtype.subtype + ): + # This is allowed on the Index.astype but we disallow it here + msg = ( + f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + ) + raise TypeError(msg) + # need to cast to different subtype try: # We need to use Index rules for astype to prevent casting diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 27672c82fdf15..14d1ce868470f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1016,14 +1016,6 @@ def astype(self, dtype, copy: bool = True): with rewrite_exception(type(values).__name__, type(self).__name__): new_values = values.astype(dtype, copy=copy) - elif is_float_dtype(self.dtype) and needs_i8_conversion(dtype): - # NB: this must come before the ExtensionDtype check below - # TODO: this differs from Series behavior; can/should we align them? - raise TypeError( - f"Cannot convert Float64Index to dtype {dtype}; integer " - "values are required for conversion" - ) - elif isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() # Note: for RangeIndex and CategoricalDtype self vs self._values diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py index 88e3dca62d9e0..0e316cf419cb0 100644 --- a/pandas/tests/arithmetic/test_interval.py +++ b/pandas/tests/arithmetic/test_interval.py @@ -133,18 +133,8 @@ def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): expected = self.elementwise_comparison(op, interval_array, other) tm.assert_numpy_array_equal(result, expected) - def test_compare_scalar_na( - self, op, interval_array, nulls_fixture, box_with_array, request - ): + def test_compare_scalar_na(self, op, interval_array, nulls_fixture, box_with_array): box = box_with_array - - if box is pd.DataFrame: - if interval_array.dtype.subtype.kind not in "iuf": - mark = pytest.mark.xfail( - reason="raises on DataFrame.transpose (would be fixed by EA2D)" - ) - request.node.add_marker(mark) - obj = tm.box_expected(interval_array, box) result = op(obj, nulls_fixture) diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index ee75f56eac7ce..5d99db9f5aa07 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -1,11 +1,11 @@ -import re - import numpy as np import pytest -from pandas.core.dtypes.common import pandas_dtype - -from pandas import Index +from pandas import ( + Index, + to_datetime, + to_timedelta, +) import pandas._testing as tm from pandas.core.indexes.api import ( Float64Index, @@ -71,15 +71,22 @@ def test_astype_float64_to_float_dtype(self, dtype): tm.assert_index_equal(result, expected) @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) - def test_cannot_cast_to_datetimelike(self, dtype): + def test_astype_float_to_datetimelike(self, dtype): + # pre-2.0 Index.astype from floating to M8/m8/Period raised, + # inconsistent with Series.astype idx = Float64Index([0, 1.1, 2]) - msg = ( - f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; " - f"integer values are required for conversion" - ) - with pytest.raises(TypeError, match=re.escape(msg)): - idx.astype(dtype) + result = idx.astype(dtype) + if dtype[0] == "M": + expected = to_datetime(idx.values) + else: + expected = to_timedelta(idx.values) + tm.assert_index_equal(result, expected) + + # check that we match Series behavior + result = idx.to_series().set_axis(range(3)).astype(dtype) + expected = expected.to_series().set_axis(range(3)) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"]) @pytest.mark.parametrize("non_finite", [np.inf, np.nan]) From 48af584f4ed632f8cb924549b69efaf166ea5e0c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Nov 2022 16:28:51 -0800 Subject: [PATCH 2/2] GH ref --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/indexes/numeric/test_astype.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9b9f8d9b8c889..a58b9c8397a42 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -329,7 +329,7 @@ Other API changes - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) -- :meth:`Index.astype` now allows casting from ``float64`` dtype to datetime-like dtypes, matching :class:`Series` behavior (:issue:`??`) +- :meth:`Index.astype` now allows casting from ``float64`` dtype to datetime-like dtypes, matching :class:`Series` behavior (:issue:`49660`) - Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`) - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index 5d99db9f5aa07..bcbd60b96c485 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -72,7 +72,7 @@ def test_astype_float64_to_float_dtype(self, dtype): @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) def test_astype_float_to_datetimelike(self, dtype): - # pre-2.0 Index.astype from floating to M8/m8/Period raised, + # GH#49660 pre-2.0 Index.astype from floating to M8/m8/Period raised, # inconsistent with Series.astype idx = Float64Index([0, 1.1, 2])