From de9369d111a6c783a165fb2a8ec367be23dc36b8 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 00:08:25 +0200 Subject: [PATCH 1/7] Fix window setting for PeriodIndexes --- pandas/core/window/rolling.py | 5 ++++- pandas/tests/window/test_rolling.py | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 335fc3db5cd86..392f93b77a4ed 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1959,7 +1959,10 @@ def validate(self): # this will raise ValueError on non-fixed freqs self.win_freq = self.window - self.window = freq.nanos + if isinstance(self._on, ABCPeriodIndex): + self.window = freq.n + else: + self.window = freq.nanos self.win_type = "freq" # min_periods must be an integer diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4dfa0287bbb03..a3e3ad10c62c8 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -819,3 +819,20 @@ def test_rolling_axis_1_non_numeric_dtypes(value): result = df.rolling(window=2, min_periods=1, axis=1).sum() expected = pd.DataFrame({"a": [1.0, 2.0]}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("func", "values"), + [ + ("min", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7]), + ("sum", [np.nan, 0, 1, 3, 5, 7, 9, 11, 13]), + ], +) +def test_rolling_period_index(func, values): + # GH: 34225 + index = pd.period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T") + ds = pd.Series([i for i in range(0, len(index))], index=index) + result = getattr(ds.rolling("2T", closed="left"), func)() + expected = pd.Series(values, index=index) + tm.assert_series_equal(result, expected) From d3419ea23dc673a122ccdba299daae953e71e3eb Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 00:12:15 +0200 Subject: [PATCH 2/7] Add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7ba64f57be136..02969a05e05f5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -347,6 +347,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) +- Bug in :meth:`Rolling` when using ``PeriodIndex`` selected too large windows when applying a function onto the ``Rolling`` object (:issue:`34225`) Reshaping ^^^^^^^^^ From 5ec46070e3bb1eeda973207d58dc942b3fad5130 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 00:31:05 +0200 Subject: [PATCH 3/7] Fix Format issue --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index a3e3ad10c62c8..ab4053fb1e5f6 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -832,7 +832,7 @@ def test_rolling_axis_1_non_numeric_dtypes(value): def test_rolling_period_index(func, values): # GH: 34225 index = pd.period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T") - ds = pd.Series([i for i in range(0, len(index))], index=index) + ds = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8], index=index) result = getattr(ds.rolling("2T", closed="left"), func)() expected = pd.Series(values, index=index) tm.assert_series_equal(result, expected) From aad912fbe29f0e80668939137b3112a7dbf4b955 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 10:19:37 +0200 Subject: [PATCH 4/7] Fix bug with differing units --- pandas/core/window/rolling.py | 2 +- pandas/tests/window/test_rolling.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 392f93b77a4ed..fbfdd7909faf1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1960,7 +1960,7 @@ def validate(self): # this will raise ValueError on non-fixed freqs self.win_freq = self.window if isinstance(self._on, ABCPeriodIndex): - self.window = freq.n + self.window = freq.nanos / (self._on.freq.nanos / self._on.freq.n) else: self.window = freq.nanos self.win_type = "freq" diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index ab4053fb1e5f6..226c7eb722eb1 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -821,6 +821,21 @@ def test_rolling_axis_1_non_numeric_dtypes(value): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + ("index", "window"), + [ + ( + pd.period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T"), + "2T", + ), + ( + pd.period_range( + start="2020-01-01 08:00", end="2020-01-01 12:00", freq="30T" + ), + "1h", + ), + ], +) @pytest.mark.parametrize( ("func", "values"), [ @@ -829,10 +844,9 @@ def test_rolling_axis_1_non_numeric_dtypes(value): ("sum", [np.nan, 0, 1, 3, 5, 7, 9, 11, 13]), ], ) -def test_rolling_period_index(func, values): +def test_rolling_period_index(index, window, func, values): # GH: 34225 - index = pd.period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T") ds = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8], index=index) - result = getattr(ds.rolling("2T", closed="left"), func)() + result = getattr(ds.rolling(window, closed="left"), func)() expected = pd.Series(values, index=index) tm.assert_series_equal(result, expected) From d0fc7010fe6826e4aa7ae4559a712af11ffd7e51 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Wed, 30 Sep 2020 23:12:45 +0200 Subject: [PATCH 5/7] Update doc/source/whatsnew/v1.2.0.rst Co-authored-by: William Ayd --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 02969a05e05f5..aedc7ea25964d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -347,7 +347,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) -- Bug in :meth:`Rolling` when using ``PeriodIndex`` selected too large windows when applying a function onto the ``Rolling`` object (:issue:`34225`) +- Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) Reshaping ^^^^^^^^^ From 2f03ded54beb321f2b0fc31d0d5a2d39ff6a277f Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Sep 2020 23:20:58 +0200 Subject: [PATCH 6/7] Create helper function --- pandas/core/window/rolling.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index fbfdd7909faf1..90bf3a13da651 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1948,7 +1948,6 @@ def validate(self): ): self._validate_monotonic() - freq = self._validate_freq() # we don't allow center if self.center: @@ -1959,10 +1958,7 @@ def validate(self): # this will raise ValueError on non-fixed freqs self.win_freq = self.window - if isinstance(self._on, ABCPeriodIndex): - self.window = freq.nanos / (self._on.freq.nanos / self._on.freq.n) - else: - self.window = freq.nanos + self._determine_window() self.win_type = "freq" # min_periods must be an integer @@ -1982,6 +1978,17 @@ def validate(self): "closed only implemented for datetimelike and offset based windows" ) + def _determine_window(self): + """ + Calculate freq for PeriodIndexes based on Index freq. Can not use + nanos, because asi8 of PeriodIndex is not in nanos + """ + freq = self._validate_freq() + if isinstance(self._on, ABCPeriodIndex): + self.window = freq.nanos / (self._on.freq.nanos / self._on.freq.n) + else: + self.window = freq.nanos + def _validate_monotonic(self): """ Validate monotonic (increasing or decreasing). From 218bf215ea43dfb37dcee6eaa463740c2f0226bf Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 1 Oct 2020 09:32:35 +0200 Subject: [PATCH 7/7] Change function --- pandas/core/window/rolling.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 90bf3a13da651..81239d2b7c5b2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1958,7 +1958,7 @@ def validate(self): # this will raise ValueError on non-fixed freqs self.win_freq = self.window - self._determine_window() + self.window = self._determine_window_length() self.win_type = "freq" # min_periods must be an integer @@ -1978,16 +1978,15 @@ def validate(self): "closed only implemented for datetimelike and offset based windows" ) - def _determine_window(self): + def _determine_window_length(self) -> Union[int, float]: """ Calculate freq for PeriodIndexes based on Index freq. Can not use nanos, because asi8 of PeriodIndex is not in nanos """ freq = self._validate_freq() if isinstance(self._on, ABCPeriodIndex): - self.window = freq.nanos / (self._on.freq.nanos / self._on.freq.n) - else: - self.window = freq.nanos + return freq.nanos / (self._on.freq.nanos / self._on.freq.n) + return freq.nanos def _validate_monotonic(self): """