From a13c7669598d095b7058fe85e67ab13a5cc86c43 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Thu, 4 Jul 2024 20:39:19 +0300 Subject: [PATCH 1/3] BUG: Fix .dt.microsecond accessor for pyarrow-backed Series --- pandas/core/arrays/arrow/array.py | 5 ++++- pandas/tests/extension/test_arrow.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4ff7553af2b69..943656ba48432 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2794,7 +2794,10 @@ def _dt_days_in_month(self) -> Self: @property def _dt_microsecond(self) -> Self: - return type(self)(pc.microsecond(self._pa_array)) + # GH 59154 + us = pc.microsecond(self._pa_array) + ms_to_us = pc.multiply(pc.millisecond(self._pa_array), 1000) + return type(self)(pc.add(us, ms_to_us)) @property def _dt_minute(self) -> Self: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f2e9d2321f33e..4d52b2d101e39 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2437,13 +2437,13 @@ def test_unsupported_dt(data): ["hour", 3], ["minute", 4], ["is_leap_year", False], - ["microsecond", 5], + ["microsecond", 2000], ["month", 1], ["nanosecond", 6], ["quarter", 1], ["second", 7], ["date", date(2023, 1, 2)], - ["time", time(3, 4, 7, 5)], + ["time", time(3, 4, 7, 2000)], ], ) def test_dt_properties(prop, expected): @@ -2456,7 +2456,7 @@ def test_dt_properties(prop, expected): hour=3, minute=4, second=7, - microsecond=5, + microsecond=2000, nanosecond=6, ), None, From fe2db3d47fd216a888d261253640cf983f6599bd Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Thu, 4 Jul 2024 20:42:54 +0300 Subject: [PATCH 2/3] Add whatsnew entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index be4b9c218f9f5..3a589e50fa8de 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -496,6 +496,7 @@ Datetimelike - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) +- Bug in :math:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) From 942184590a27f9c9fb71ad4a6dbf0d6d769abfac Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Sun, 7 Jul 2024 10:04:19 +0300 Subject: [PATCH 3/3] Write test --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/extension/test_arrow.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3a589e50fa8de..94baebd1f62c0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -496,11 +496,11 @@ Datetimelike - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) -- Bug in :math:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) - Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`) +- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4d52b2d101e39..4fad5e45409b9 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2473,6 +2473,28 @@ def test_dt_properties(prop, expected): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("microsecond", [2000, 5, 0]) +def test_dt_microsecond(microsecond): + # GH 59183 + ser = pd.Series( + [ + pd.Timestamp( + year=2024, + month=7, + day=7, + second=5, + microsecond=microsecond, + nanosecond=6, + ), + None, + ], + dtype=ArrowDtype(pa.timestamp("ns")), + ) + result = ser.dt.microsecond + expected = pd.Series([microsecond, None], dtype="int64[pyarrow]") + tm.assert_series_equal(result, expected) + + def test_dt_is_month_start_end(): ser = pd.Series( [