From a6d37433679ee5dfac3849a74be1492230f4fcc5 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 16 Jan 2024 20:05:27 -0800 Subject: [PATCH 1/4] REGR: DatetimeTZDtype __from_arrow__ interprets UTC values as wall time --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/arrays/datetimes/test_constructors.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0a288f435c50c..5b51bc9debb33 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -919,7 +919,7 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> DatetimeArray: else: np_arr = array.to_numpy() - return DatetimeArray._from_sequence(np_arr, dtype=self, copy=False) + return DatetimeArray._simple_new(np_arr, dtype=self) def __setstate__(self, state) -> None: # for pickle compat. __get_state__ is defined in the diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index e14cd0c6f2b7d..8e41f4d2e5eef 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -6,6 +6,7 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd +from pandas import Series import pandas._testing as tm from pandas.core.arrays import DatetimeArray @@ -242,6 +243,17 @@ def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_( tm.assert_extension_array_equal(result, expected) +def test_datetimetz_from_arrow_roundtrip(): + # GH 56775 + pa = pytest.importorskip("pyarrow") + + ser = Series(["2012-01-01", "2012-01-02"], dtype="datetime64[ns, Europe/Brussels]") + + result = ser.dtype.__from_arrow__(pa.array(ser)) + expected = DatetimeArray._from_sequence(ser) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( ("unit", "tz"), [ From 9a9cee6db8f98372b218349fe583e37fbbbde82e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 16 Jan 2024 20:16:10 -0800 Subject: [PATCH 2/4] change the test --- pandas/tests/arrays/datetimes/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 8e41f4d2e5eef..88c70dd864add 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -233,7 +233,7 @@ def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_( dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz) result = dtype.__from_arrow__(arr) - expected = DatetimeArray._from_sequence( + expected = DatetimeArray._simple_new( np.array(data, dtype=f"datetime64[{pa_unit}]").astype(f"datetime64[{pd_unit}]"), dtype=dtype, ) From e2a9de10430627bed59075e11af9b9621fa0c563 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:35:51 -0800 Subject: [PATCH 3/4] update test --- .../arrays/datetimes/test_constructors.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 88c70dd864add..927ed2d6f4756 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -6,7 +6,6 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -from pandas import Series import pandas._testing as tm from pandas.core.arrays import DatetimeArray @@ -223,7 +222,7 @@ def test_2d(self, order): ("s", "ns", "US/Central", "Asia/Kolkata", COARSE_TO_FINE_SAFE), ], ) -def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_( +def test_from_arrow_with_different_units_and_timezones_with( pa_unit, pd_unit, pa_tz, pd_tz, data ): pa = pytest.importorskip("pyarrow") @@ -233,9 +232,10 @@ def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_( dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz) result = dtype.__from_arrow__(arr) - expected = DatetimeArray._simple_new( - np.array(data, dtype=f"datetime64[{pa_unit}]").astype(f"datetime64[{pd_unit}]"), - dtype=dtype, + expected = ( + DatetimeArray._from_sequence(data, dtype=f"datetime64[{pa_unit}]") + .tz_localize("UTC") + .astype(dtype, copy=False) ) tm.assert_extension_array_equal(result, expected) @@ -243,17 +243,6 @@ def test_from_arrowtest_from_arrow_with_different_units_and_timezones_with_( tm.assert_extension_array_equal(result, expected) -def test_datetimetz_from_arrow_roundtrip(): - # GH 56775 - pa = pytest.importorskip("pyarrow") - - ser = Series(["2012-01-01", "2012-01-02"], dtype="datetime64[ns, Europe/Brussels]") - - result = ser.dtype.__from_arrow__(pa.array(ser)) - expected = DatetimeArray._from_sequence(ser) - tm.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize( ("unit", "tz"), [ From 5a4e5e0829889d9321f207133dfe6869244fa4fd Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:46:03 -0800 Subject: [PATCH 4/4] address more comments --- pandas/tests/arrays/datetimes/test_constructors.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 927ed2d6f4756..3d22427d41985 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -232,10 +232,8 @@ def test_from_arrow_with_different_units_and_timezones_with( dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz) result = dtype.__from_arrow__(arr) - expected = ( - DatetimeArray._from_sequence(data, dtype=f"datetime64[{pa_unit}]") - .tz_localize("UTC") - .astype(dtype, copy=False) + expected = DatetimeArray._from_sequence(data, dtype=f"M8[{pa_unit}, UTC]").astype( + dtype, copy=False ) tm.assert_extension_array_equal(result, expected)