Skip to content

Commit 779592d

Browse files
authored
BUG: Series.resample fails on NaT index (#39229)
1 parent b58e2b8 commit 779592d

File tree

4 files changed

+50
-12
lines changed

4 files changed

+50
-12
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ Groupby/resample/rolling
354354
- Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
355355
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
356356
- Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`)
357+
- Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`)
357358

358359
Reshaping
359360
^^^^^^^^^

pandas/core/resample.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,8 @@ def _wrap_result(self, result):
453453

454454
if isinstance(result, ABCSeries) and result.empty:
455455
obj = self.obj
456-
result.index = _asfreq_compat(obj.index, freq=self.freq)
456+
# When index is all NaT, result is empty but index is not
457+
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
457458
result.name = getattr(obj, "name", None)
458459

459460
return result
@@ -1651,10 +1652,14 @@ def _get_period_bins(self, ax: PeriodIndex):
16511652
nat_count = np.sum(memb._isnan)
16521653
memb = memb[~memb._isnan]
16531654

1654-
# if index contains no valid (non-NaT) values, return empty index
16551655
if not len(memb):
1656+
# index contains no valid (non-NaT) values
1657+
bins = np.array([], dtype=np.int64)
16561658
binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)
1657-
return binner, [], labels
1659+
if len(ax) > 0:
1660+
# index is all NaT
1661+
binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax))
1662+
return binner, bins, labels
16581663

16591664
freq_mult = self.freq.n
16601665

@@ -1700,12 +1705,7 @@ def _get_period_bins(self, ax: PeriodIndex):
17001705
bins = memb.searchsorted(prng, side="left")
17011706

17021707
if nat_count > 0:
1703-
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1704-
# shift bins by the number of NaT
1705-
bins += nat_count
1706-
bins = np.insert(bins, 0, nat_count)
1707-
binner = binner.insert(0, NaT)
1708-
labels = labels.insert(0, NaT)
1708+
binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count)
17091709

17101710
return binner, bins, labels
17111711

@@ -1849,6 +1849,19 @@ def _get_period_range_edges(
18491849
return first, last
18501850

18511851

1852+
def _insert_nat_bin(
1853+
binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int
1854+
) -> Tuple[PeriodIndex, np.ndarray, PeriodIndex]:
1855+
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
1856+
# shift bins by the number of NaT
1857+
assert nat_count > 0
1858+
bins += nat_count
1859+
bins = np.insert(bins, 0, nat_count)
1860+
binner = binner.insert(0, NaT)
1861+
labels = labels.insert(0, NaT)
1862+
return binner, bins, labels
1863+
1864+
18521865
def _adjust_dates_anchored(
18531866
first, last, freq, closed="right", origin="start_day", offset=None
18541867
):

pandas/tests/resample/test_base.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pytest
55

6-
from pandas import DataFrame, Series
6+
from pandas import DataFrame, NaT, PeriodIndex, Series
77
import pandas._testing as tm
88
from pandas.core.groupby.groupby import DataError
99
from pandas.core.groupby.grouper import Grouper
@@ -110,6 +110,30 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
110110
tm.assert_series_equal(result, expected, check_dtype=False)
111111

112112

113+
@all_ts
114+
@pytest.mark.parametrize("freq", ["M", "D", "H"])
115+
def test_resample_nat_index_series(request, freq, series, resample_method):
116+
# GH39227
117+
118+
if freq == "M":
119+
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
120+
121+
s = series.copy()
122+
s.index = PeriodIndex([NaT] * len(s), freq=freq)
123+
result = getattr(s.resample(freq), resample_method)()
124+
125+
if resample_method == "ohlc":
126+
expected = DataFrame(
127+
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
128+
)
129+
tm.assert_frame_equal(result, expected, check_dtype=False)
130+
else:
131+
expected = s[:0].copy()
132+
tm.assert_series_equal(result, expected, check_dtype=False)
133+
tm.assert_index_equal(result.index, expected.index)
134+
assert result.index.freq == expected.index.freq
135+
136+
113137
@all_ts
114138
@pytest.mark.parametrize("freq", ["M", "D", "H"])
115139
@pytest.mark.parametrize("resample_method", ["count", "size"])

pandas/tests/resample/test_period_index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -787,9 +787,9 @@ def test_resample_with_nat(self, periods, values, freq, expected_values):
787787
def test_resample_with_only_nat(self):
788788
# GH 13224
789789
pi = PeriodIndex([pd.NaT] * 3, freq="S")
790-
frame = DataFrame([2, 3, 5], index=pi)
790+
frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
791791
expected_index = PeriodIndex(data=[], freq=pi.freq)
792-
expected = DataFrame(index=expected_index)
792+
expected = DataFrame(index=expected_index, columns=["a"], dtype="int64")
793793
result = frame.resample("1s").mean()
794794
tm.assert_frame_equal(result, expected)
795795

0 commit comments

Comments
 (0)