From 53777cd27e90e9aaf4609c2dddf7a53926e51df3 Mon Sep 17 00:00:00 2001 From: Yulia Date: Wed, 15 Feb 2023 19:36:43 +0000 Subject: [PATCH 1/5] Fix issue, add test --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/groupby/ops.py | 3 +++ pandas/tests/groupby/test_grouping.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c0082b451c95d..4fbb80953ee66 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1371,6 +1371,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Bug in :meth:`DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) +- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 726d75d705344..0e1d1a4db2b53 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1085,6 +1085,9 @@ def groups(self): } return result + def __iter__(self) -> Iterator[Hashable]: + return iter(self.groupings[0].grouping_vector) + @property def nkeys(self) -> int: # still matches len(self.groupings), but we can hard-code diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index f5bbfce560d33..8d465f0961120 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -442,6 +442,29 @@ def test_groupby_grouper_f_sanity_checked(self): with pytest.raises(AssertionError, match=msg): ts.groupby(lambda key: key[0:6]) + def test_groupby_with_datetime_key(self): + # GH 51158 + df = DataFrame( + { + "id": ["a", "b"] * 3, + "b": date_range("2000-01-01", "2000-01-03", freq="9H"), + } + ) + grouper = Grouper(key="b", freq="D") + gb = df.groupby([grouper, "id"]) + + # test number of groups + expected = { + (Timestamp("2000-01-01"), "a"): [0, 2], + (Timestamp("2000-01-01"), "b"): [1], + (Timestamp("2000-01-02"), "a"): [4], + (Timestamp("2000-01-02"), "b"): [3, 5], + } + tm.assert_dict_equal(gb.groups, expected) + + # test number of group keys + assert len(gb.groups.keys()) == 4 + def test_grouping_error_on_multidim_input(self, df): msg = "Grouper for '' not 1-dimensional" with pytest.raises(ValueError, match=msg): From a0e5a08479eefb82160d02ec9803a546997cdb86 Mon Sep 17 00:00:00 2001 From: Yulia Date: Sat, 18 Mar 2023 15:02:27 +0000 Subject: [PATCH 2/5] fix conflict --- pandas/tests/groupby/test_grouping.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index bb7242a9e6e0f..2835987aa7481 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -441,6 +441,10 @@ def test_groupby_grouper_f_sanity_checked(self): msg = "'Timestamp' object is not subscriptable" with pytest.raises(TypeError, match=msg): ts.groupby(lambda key: key[0:6]) + result = ts.groupby(lambda x: x).sum() + expected = ts.groupby(ts.index).sum() + expected.index.freq = None + tm.assert_series_equal(result, expected) def test_groupby_with_datetime_key(self): # GH 51158 @@ -465,12 +469,6 @@ def test_groupby_with_datetime_key(self): # test number of group keys assert len(gb.groups.keys()) == 4 - result = ts.groupby(lambda x: x).sum() - expected = ts.groupby(ts.index).sum() - expected.index.freq = None - tm.assert_series_equal(result, expected) - - def test_grouping_error_on_multidim_input(self, df): msg = "Grouper for '' not 1-dimensional" with pytest.raises(ValueError, match=msg): From 132b46b33864b3313e4f213816a9a0e53dfd1db3 Mon Sep 17 00:00:00 2001 From: Yulia Date: Sat, 18 Mar 2023 15:04:39 +0000 Subject: [PATCH 3/5] fix conflict --- pandas/tests/groupby/test_grouping.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 2835987aa7481..db2ce709d764d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -441,6 +441,7 @@ def test_groupby_grouper_f_sanity_checked(self): msg = "'Timestamp' object is not subscriptable" with pytest.raises(TypeError, match=msg): ts.groupby(lambda key: key[0:6]) + result = ts.groupby(lambda x: x).sum() expected = ts.groupby(ts.index).sum() expected.index.freq = None From 66bf4c8c7357e51da79deb4ab77d215491c0e23e Mon Sep 17 00:00:00 2001 From: Yulia Date: Sat, 18 Mar 2023 19:21:20 +0000 Subject: [PATCH 4/5] bug description moved to 2.1.0 --- doc/source/whatsnew/v2.0.0.rst | 3 --- doc/source/whatsnew/v2.1.0.rst | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f80a03f3ea7e1..0856382f6cbdd 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1348,9 +1348,6 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`) - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) - Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) -- Bug in :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, and :meth:`Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) -- Bug in :meth:`DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) -- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, and :meth:`.Resampler.agg` would ignore arguments when passed a list of functions (:issue:`50863`) - Bug in :meth:`.DataFrameGroupBy.ohlc` ignoring ``as_index=False`` (:issue:`51413`) - Bug in :meth:`DataFrameGroupBy.agg` after subsetting columns (e.g. ``.groupby(...)[["a", "b"]]``) would not include groupings in the result (:issue:`51186`) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3f898ca23bd6f..b759b5b75f7eb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -215,6 +215,7 @@ Groupby/resample/rolling grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex` or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) +- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Reshaping From bc4b969e8c0b13b62a70ddd293c39baa11a2ea4b Mon Sep 17 00:00:00 2001 From: Yulia Date: Fri, 31 Mar 2023 15:00:12 +0100 Subject: [PATCH 5/5] fixed pre-commit --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d19b78b656890..4fd2fd815d1f5 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -243,8 +243,8 @@ Groupby/resample/rolling grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex` or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) -- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) +- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) - Reshaping