diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index dca31641a4634..ef904c1d7021a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -354,6 +354,7 @@ Groupby/resample/rolling - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) - Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`) - Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`) +- Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index bae0d69f6b782..1c8f47374860c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -453,7 +453,8 @@ def _wrap_result(self, result): if isinstance(result, ABCSeries) and result.empty: obj = self.obj - result.index = _asfreq_compat(obj.index, freq=self.freq) + # When index is all NaT, result is empty but index is not + result.index = _asfreq_compat(obj.index[:0], freq=self.freq) result.name = getattr(obj, "name", None) return result @@ -1651,10 +1652,14 @@ def _get_period_bins(self, ax: PeriodIndex): nat_count = np.sum(memb._isnan) memb = memb[~memb._isnan] - # if index contains no valid (non-NaT) values, return empty index if not len(memb): + # index contains no valid (non-NaT) values + bins = np.array([], dtype=np.int64) binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name) - return binner, [], labels + if len(ax) > 0: + # index is all NaT + binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax)) + return binner, bins, labels freq_mult = self.freq.n @@ -1700,12 +1705,7 @@ def _get_period_bins(self, ax: PeriodIndex): bins = memb.searchsorted(prng, side="left") if nat_count > 0: - # NaT handling as in pandas._lib.lib.generate_bins_dt64() - # shift bins by the number of NaT - bins += nat_count - bins = np.insert(bins, 0, nat_count) - binner = binner.insert(0, NaT) - labels = labels.insert(0, NaT) + binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count) return binner, bins, labels @@ -1849,6 +1849,19 @@ def _get_period_range_edges( return first, last +def _insert_nat_bin( + binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int +) -> Tuple[PeriodIndex, np.ndarray, PeriodIndex]: + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + # shift bins by the number of NaT + assert nat_count > 0 + bins += nat_count + bins = np.insert(bins, 0, nat_count) + binner = binner.insert(0, NaT) + labels = labels.insert(0, NaT) + return binner, bins, labels + + def _adjust_dates_anchored( first, last, freq, closed="right", origin="start_day", offset=None ): diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 7389fa31109f8..1154bc3316ae8 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import DataFrame, NaT, PeriodIndex, Series import pandas._testing as tm from pandas.core.groupby.groupby import DataError from pandas.core.groupby.grouper import Grouper @@ -110,6 +110,30 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method): tm.assert_series_equal(result, expected, check_dtype=False) +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_nat_index_series(request, freq, series, resample_method): + # GH39227 + + if freq == "M": + request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails")) + + s = series.copy() + s.index = PeriodIndex([NaT] * len(s), freq=freq) + result = getattr(s.resample(freq), resample_method)() + + if resample_method == "ohlc": + expected = DataFrame( + [], index=s.index[:0].copy(), columns=["open", "high", "low", "close"] + ) + tm.assert_frame_equal(result, expected, check_dtype=False) + else: + expected = s[:0].copy() + tm.assert_series_equal(result, expected, check_dtype=False) + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + + @all_ts @pytest.mark.parametrize("freq", ["M", "D", "H"]) @pytest.mark.parametrize("resample_method", ["count", "size"]) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index e83196e9c7d56..2fe3fb91768e6 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -787,9 +787,9 @@ def test_resample_with_nat(self, periods, values, freq, expected_values): def test_resample_with_only_nat(self): # GH 13224 pi = PeriodIndex([pd.NaT] * 3, freq="S") - frame = DataFrame([2, 3, 5], index=pi) + frame = DataFrame([2, 3, 5], index=pi, columns=["a"]) expected_index = PeriodIndex(data=[], freq=pi.freq) - expected = DataFrame(index=expected_index) + expected = DataFrame(index=expected_index, columns=["a"], dtype="int64") result = frame.resample("1s").mean() tm.assert_frame_equal(result, expected)