diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 198a7155e1a1e..f9e920aed46ea 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -371,6 +371,7 @@ Datetimelike - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) +- Bug in :func:`to_datetime` converting :class:`Series` or :class:`DataFrame` containing :class:`arrays.ArrowExtensionArray` of ``pyarrow`` timestamps to numpy datetimes (:issue:`52545`) - Bug in :meth:`DataFrame.to_sql` raising ``ValueError`` for pyarrow-backed date like dtypes (:issue:`53854`) - Bug in :meth:`Timestamp.date`, :meth:`Timestamp.isocalendar`, :meth:`Timestamp.timetuple`, and :meth:`Timestamp.toordinal` were returning incorrect results for inputs outside those supported by the Python standard library's datetime module (:issue:`53668`) - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) @@ -378,7 +379,6 @@ Datetimelike - Bug in constructing a :class:`Series` or :class:`DataFrame` from a datetime or timedelta scalar always inferring nanosecond resolution instead of inferring from the input (:issue:`52212`) - Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`) - Timedelta ^^^^^^^^^ - :meth:`TimedeltaIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 13a434812db3b..b422ee1a08e9f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -57,7 +57,10 @@ is_list_like, is_numeric_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + DatetimeTZDtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -71,6 +74,7 @@ ) from pandas.core import algorithms from pandas.core.algorithms import unique +from pandas.core.arrays import ArrowExtensionArray from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, @@ -403,6 +407,25 @@ def _convert_listlike_datetimes( arg = arg.tz_convert(None).tz_localize("utc") return arg + elif isinstance(arg_dtype, ArrowDtype) and arg_dtype.type is Timestamp: + # TODO: Combine with above if DTI/DTA supports Arrow timestamps + if utc: + # pyarrow uses UTC, not lowercase utc + if isinstance(arg, Index): + arg_array = cast(ArrowExtensionArray, arg.array) + if arg_dtype.pyarrow_dtype.tz is not None: + arg_array = arg_array._dt_tz_convert("UTC") + else: + arg_array = arg_array._dt_tz_localize("UTC") + arg = Index(arg_array) + else: + # ArrowExtensionArray + if arg_dtype.pyarrow_dtype.tz is not None: + arg = arg._dt_tz_convert("UTC") + else: + arg = arg._dt_tz_localize("UTC") + return arg + elif lib.is_np_dtype(arg_dtype, "M"): if not is_supported_unit(get_unit_from_dtype(arg_dtype)): # We go to closest supported reso, i.e. "s" diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4466ee96235f5..5ea0ca1fddbd3 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -933,6 +933,32 @@ def test_to_datetime_dtarr(self, tz): result = to_datetime(arr) assert result is arr + # Doesn't work on Windows since tzpath not set correctly + @td.skip_if_windows + @pytest.mark.parametrize("arg_class", [Series, Index]) + @pytest.mark.parametrize("utc", [True, False]) + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_to_datetime_arrow(self, tz, utc, arg_class): + pa = pytest.importorskip("pyarrow") + + dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz) + dti = arg_class(dti) + + dti_arrow = dti.astype(pd.ArrowDtype(pa.timestamp(unit="ns", tz=tz))) + + result = to_datetime(dti_arrow, utc=utc) + expected = to_datetime(dti, utc=utc).astype( + pd.ArrowDtype(pa.timestamp(unit="ns", tz=tz if not utc else "UTC")) + ) + if not utc and arg_class is not Series: + # Doesn't hold for utc=True, since that will astype + # to_datetime also returns a new object for series + assert result is dti_arrow + if arg_class is Series: + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + def test_to_datetime_pydatetime(self): actual = to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15)