Skip to content

Commit aa7b17e

Browse files
authored
BUG: resample with ArrowDtype (#56371)
* BUG: resample with ArrowDtype * Typing * xfail for windows * Fix again? * Avoid tuple * Add gh numbers
1 parent 114f067 commit aa7b17e

File tree

5 files changed

+57
-3
lines changed

5 files changed

+57
-3
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ Groupby/resample/rolling
648648
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
649649
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
650650
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
651+
- Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`)
651652
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
652653
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
653654
-

pandas/core/groupby/grouper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,6 @@ def _get_grouper(
330330

331331
return grouper, obj
332332

333-
@final
334333
def _set_grouper(
335334
self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None
336335
) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:

pandas/core/resample.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
rewrite_warning,
3939
)
4040

41+
from pandas.core.dtypes.dtypes import ArrowDtype
4142
from pandas.core.dtypes.generic import (
4243
ABCDataFrame,
4344
ABCSeries,
@@ -48,6 +49,7 @@
4849
ResamplerWindowApply,
4950
warn_alias_replacement,
5051
)
52+
from pandas.core.arrays import ArrowExtensionArray
5153
from pandas.core.base import (
5254
PandasObject,
5355
SelectionMixin,
@@ -68,6 +70,7 @@
6870
from pandas.core.groupby.grouper import Grouper
6971
from pandas.core.groupby.ops import BinGrouper
7072
from pandas.core.indexes.api import MultiIndex
73+
from pandas.core.indexes.base import Index
7174
from pandas.core.indexes.datetimes import (
7275
DatetimeIndex,
7376
date_range,
@@ -109,7 +112,6 @@
109112

110113
from pandas import (
111114
DataFrame,
112-
Index,
113115
Series,
114116
)
115117

@@ -511,6 +513,9 @@ def _wrap_result(self, result):
511513
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
512514
result.name = getattr(obj, "name", None)
513515

516+
if self._timegrouper._arrow_dtype is not None:
517+
result.index = result.index.astype(self._timegrouper._arrow_dtype)
518+
514519
return result
515520

516521
@final
@@ -2163,6 +2168,7 @@ def __init__(
21632168
self.fill_method = fill_method
21642169
self.limit = limit
21652170
self.group_keys = group_keys
2171+
self._arrow_dtype: ArrowDtype | None = None
21662172

21672173
if origin in ("epoch", "start", "start_day", "end", "end_day"):
21682174
# error: Incompatible types in assignment (expression has type "Union[Union[
@@ -2213,7 +2219,7 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:
22132219
TypeError if incompatible axis
22142220
22152221
"""
2216-
_, ax, indexer = self._set_grouper(obj, gpr_index=None)
2222+
_, ax, _ = self._set_grouper(obj, gpr_index=None)
22172223
if isinstance(ax, DatetimeIndex):
22182224
return DatetimeIndexResampler(
22192225
obj,
@@ -2495,6 +2501,17 @@ def _get_period_bins(self, ax: PeriodIndex):
24952501

24962502
return binner, bins, labels
24972503

2504+
def _set_grouper(
2505+
self, obj: NDFrameT, sort: bool = False, *, gpr_index: Index | None = None
2506+
) -> tuple[NDFrameT, Index, npt.NDArray[np.intp] | None]:
2507+
obj, ax, indexer = super()._set_grouper(obj, sort, gpr_index=gpr_index)
2508+
if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm":
2509+
self._arrow_dtype = ax.dtype
2510+
ax = Index(
2511+
cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array()
2512+
)
2513+
return obj, ax, indexer
2514+
24982515

24992516
def _take_new_index(
25002517
obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0

pandas/tests/resample/test_datetime_index.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
from pandas._libs import lib
99
from pandas._typing import DatetimeNaTType
10+
from pandas.compat import is_platform_windows
11+
import pandas.util._test_decorators as td
1012

1113
import pandas as pd
1214
from pandas import (
@@ -2195,3 +2197,27 @@ def test_resample_b_55282(unit):
21952197
index=exp_dti,
21962198
)
21972199
tm.assert_series_equal(result, expected)
2200+
2201+
2202+
@td.skip_if_no("pyarrow")
2203+
@pytest.mark.parametrize(
2204+
"tz",
2205+
[
2206+
None,
2207+
pytest.param(
2208+
"UTC",
2209+
marks=pytest.mark.xfail(
2210+
condition=is_platform_windows(),
2211+
reason="TODO: Set ARROW_TIMEZONE_DATABASE env var in CI",
2212+
),
2213+
),
2214+
],
2215+
)
2216+
def test_arrow_timestamp_resample(tz):
2217+
# GH 56371
2218+
idx = Series(date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]")
2219+
if tz is not None:
2220+
idx = idx.dt.tz_localize(tz)
2221+
expected = Series(np.arange(5, dtype=np.float64), index=idx)
2222+
result = expected.resample("1D").mean()
2223+
tm.assert_series_equal(result, expected)

pandas/tests/resample/test_timedelta.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79
from pandas import (
810
DataFrame,
@@ -207,3 +209,12 @@ def test_resample_closed_right():
207209
),
208210
)
209211
tm.assert_series_equal(result, expected)
212+
213+
214+
@td.skip_if_no("pyarrow")
215+
def test_arrow_duration_resample():
216+
# GH 56371
217+
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
218+
expected = Series(np.arange(5, dtype=np.float64), index=idx)
219+
result = expected.resample("1D").mean()
220+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)