From 0e52a68dc84fbcf307de102d2f71147671db6271 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Tue, 6 Oct 2020 20:03:59 +0200 Subject: [PATCH] Backport PR #36753 on branch 1.1.x: BUG: Segfault with string Index when using Rolling after Groupby --- pandas/core/window/rolling.py | 4 ++-- pandas/tests/window/test_grouper.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f7bcd1e795fd3..617c43e0a59ed 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -409,7 +409,7 @@ def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: if self.on is not None and not self._on.equals(obj.index): name = self._on.name - final.append(Series(self._on, index=obj.index, name=name)) + final.append(Series(self._on, index=self.obj.index, name=name)) if self._selection is not None: @@ -2259,7 +2259,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: """ rolling_indexer: Type[BaseIndexer] indexer_kwargs: Optional[Dict] = None - index_array = self.obj.index.asi8 + index_array = self._on.asi8 if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) indexer_kwargs = self.window.__dict__ diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 806c22c60b48f..62499714167e1 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -417,3 +417,32 @@ def test_groupby_rolling_empty_frame(self): result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None]) tm.assert_frame_equal(result, expected) + + def test_groupby_rolling_string_index(self): + # GH: 36727 + df = pd.DataFrame( + [ + ["A", "group_1", pd.Timestamp(2019, 1, 1, 9)], + ["B", "group_1", pd.Timestamp(2019, 1, 2, 9)], + ["Z", "group_2", pd.Timestamp(2019, 1, 3, 9)], + ["H", "group_1", pd.Timestamp(2019, 1, 6, 9)], + ["E", "group_2", pd.Timestamp(2019, 1, 20, 9)], + ], + columns=["index", "group", "eventTime"], + ).set_index("index") + + groups = df.groupby("group") + df["count_to_date"] = groups.cumcount() + rolling_groups = groups.rolling("10d", on="eventTime") + result = rolling_groups.apply(lambda df: df.shape[0]) + expected = pd.DataFrame( + [ + ["A", "group_1", pd.Timestamp(2019, 1, 1, 9), 1.0], + ["B", "group_1", pd.Timestamp(2019, 1, 2, 9), 2.0], + ["H", "group_1", pd.Timestamp(2019, 1, 6, 9), 3.0], + ["Z", "group_2", pd.Timestamp(2019, 1, 3, 9), 1.0], + ["E", "group_2", pd.Timestamp(2019, 1, 20, 9), 1.0], + ], + columns=["index", "group", "eventTime", "count_to_date"], + ).set_index(["group", "index"]) + tm.assert_frame_equal(result, expected)