Skip to content

Commit fa613b3

Browse files
authored
BUG: Grouper specified by key not regarded as in-axis (#50414)
1 parent 445bed9 commit fa613b3

File tree

4 files changed

+45
-32
lines changed

4 files changed

+45
-32
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,8 @@ Groupby/resample/rolling
934934
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` with ``dropna=False`` would drop NA values when the grouper was categorical (:issue:`36327`)
935935
- Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`)
936936
- Bug in :meth:`.SeriesGroupBy.nth` would raise when grouper contained NA values after subsetting from a :class:`DataFrameGroupBy` (:issue:`26454`)
937+
- Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`)
938+
-
937939

938940
Reshaping
939941
^^^^^^^^^

pandas/core/groupby/groupby.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,10 +1014,10 @@ def _set_group_selection(self) -> None:
10141014
"""
10151015
# This is a no-op for SeriesGroupBy
10161016
grp = self.grouper
1017-
if not (
1018-
grp.groupings is not None
1019-
and self.obj.ndim > 1
1020-
and self._group_selection is None
1017+
if (
1018+
grp.groupings is None
1019+
or self.obj.ndim == 1
1020+
or self._group_selection is not None
10211021
):
10221022
return
10231023

pandas/core/groupby/grouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ def is_in_obj(gpr) -> bool:
906906
elif isinstance(gpr, Grouper) and gpr.key is not None:
907907
# Add key to exclusions
908908
exclusions.add(gpr.key)
909-
in_axis = False
909+
in_axis = True
910910
else:
911911
in_axis = False
912912

pandas/tests/groupby/test_grouping.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pandas import (
88
CategoricalIndex,
99
DataFrame,
10+
Grouper,
1011
Index,
1112
MultiIndex,
1213
Series,
@@ -168,7 +169,7 @@ def test_grouper_index_types(self, index):
168169
def test_grouper_multilevel_freq(self):
169170

170171
# GH 7885
171-
# with level and freq specified in a pd.Grouper
172+
# with level and freq specified in a Grouper
172173
from datetime import (
173174
date,
174175
timedelta,
@@ -182,20 +183,20 @@ def test_grouper_multilevel_freq(self):
182183
# Check string level
183184
expected = (
184185
df.reset_index()
185-
.groupby([pd.Grouper(key="foo", freq="W"), pd.Grouper(key="bar", freq="W")])
186+
.groupby([Grouper(key="foo", freq="W"), Grouper(key="bar", freq="W")])
186187
.sum()
187188
)
188189
# reset index changes columns dtype to object
189190
expected.columns = Index([0], dtype="int64")
190191

191192
result = df.groupby(
192-
[pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")]
193+
[Grouper(level="foo", freq="W"), Grouper(level="bar", freq="W")]
193194
).sum()
194195
tm.assert_frame_equal(result, expected)
195196

196197
# Check integer level
197198
result = df.groupby(
198-
[pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")]
199+
[Grouper(level=0, freq="W"), Grouper(level=1, freq="W")]
199200
).sum()
200201
tm.assert_frame_equal(result, expected)
201202

@@ -206,11 +207,11 @@ def test_grouper_creation_bug(self):
206207
g = df.groupby("A")
207208
expected = g.sum()
208209

209-
g = df.groupby(pd.Grouper(key="A"))
210+
g = df.groupby(Grouper(key="A"))
210211
result = g.sum()
211212
tm.assert_frame_equal(result, expected)
212213

213-
g = df.groupby(pd.Grouper(key="A", axis=0))
214+
g = df.groupby(Grouper(key="A", axis=0))
214215
result = g.sum()
215216
tm.assert_frame_equal(result, expected)
216217

@@ -220,13 +221,13 @@ def test_grouper_creation_bug(self):
220221
tm.assert_frame_equal(result, expected)
221222

222223
# GH14334
223-
# pd.Grouper(key=...) may be passed in a list
224+
# Grouper(key=...) may be passed in a list
224225
df = DataFrame(
225226
{"A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6]}
226227
)
227228
# Group by single column
228229
expected = df.groupby("A").sum()
229-
g = df.groupby([pd.Grouper(key="A")])
230+
g = df.groupby([Grouper(key="A")])
230231
result = g.sum()
231232
tm.assert_frame_equal(result, expected)
232233

@@ -235,17 +236,17 @@ def test_grouper_creation_bug(self):
235236
expected = df.groupby(["A", "B"]).sum()
236237

237238
# Group with two Grouper objects
238-
g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")])
239+
g = df.groupby([Grouper(key="A"), Grouper(key="B")])
239240
result = g.sum()
240241
tm.assert_frame_equal(result, expected)
241242

242243
# Group with a string and a Grouper object
243-
g = df.groupby(["A", pd.Grouper(key="B")])
244+
g = df.groupby(["A", Grouper(key="B")])
244245
result = g.sum()
245246
tm.assert_frame_equal(result, expected)
246247

247248
# Group with a Grouper object and a string
248-
g = df.groupby([pd.Grouper(key="A"), "B"])
249+
g = df.groupby([Grouper(key="A"), "B"])
249250
result = g.sum()
250251
tm.assert_frame_equal(result, expected)
251252

@@ -257,15 +258,15 @@ def test_grouper_creation_bug(self):
257258
names=["one", "two", "three"],
258259
),
259260
)
260-
result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
261+
result = s.groupby(Grouper(level="three", freq="M")).sum()
261262
expected = Series(
262263
[28],
263264
index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"),
264265
)
265266
tm.assert_series_equal(result, expected)
266267

267268
# just specifying a level breaks
268-
result = s.groupby(pd.Grouper(level="one")).sum()
269+
result = s.groupby(Grouper(level="one")).sum()
269270
expected = s.groupby(level="one").sum()
270271
tm.assert_series_equal(result, expected)
271272

@@ -282,18 +283,14 @@ def test_grouper_column_and_index(self):
282283
{"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]},
283284
index=idx,
284285
)
285-
result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean(
286-
numeric_only=True
287-
)
286+
result = df_multi.groupby(["B", Grouper(level="inner")]).mean(numeric_only=True)
288287
expected = (
289288
df_multi.reset_index().groupby(["B", "inner"]).mean(numeric_only=True)
290289
)
291290
tm.assert_frame_equal(result, expected)
292291

293292
# Test the reverse grouping order
294-
result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean(
295-
numeric_only=True
296-
)
293+
result = df_multi.groupby([Grouper(level="inner"), "B"]).mean(numeric_only=True)
297294
expected = (
298295
df_multi.reset_index().groupby(["inner", "B"]).mean(numeric_only=True)
299296
)
@@ -302,7 +299,7 @@ def test_grouper_column_and_index(self):
302299
# Grouping a single-index frame by a column and the index should
303300
# be equivalent to resetting the index and grouping by two columns
304301
df_single = df_multi.reset_index("outer")
305-
result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean(
302+
result = df_single.groupby(["B", Grouper(level="inner")]).mean(
306303
numeric_only=True
307304
)
308305
expected = (
@@ -311,7 +308,7 @@ def test_grouper_column_and_index(self):
311308
tm.assert_frame_equal(result, expected)
312309

313310
# Test the reverse grouping order
314-
result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean(
311+
result = df_single.groupby([Grouper(level="inner"), "B"]).mean(
315312
numeric_only=True
316313
)
317314
expected = (
@@ -368,7 +365,7 @@ def test_grouper_getting_correct_binner(self):
368365
),
369366
)
370367
result = df.groupby(
371-
[pd.Grouper(level="one"), pd.Grouper(level="two", freq="M")]
368+
[Grouper(level="one"), Grouper(level="two", freq="M")]
372369
).sum()
373370
expected = DataFrame(
374371
{"A": [31, 28, 21, 31, 28, 21]},
@@ -646,7 +643,7 @@ def test_list_grouper_with_nat(self):
646643
# GH 14715
647644
df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
648645
df.iloc[-1] = pd.NaT
649-
grouper = pd.Grouper(key="date", freq="AS")
646+
grouper = Grouper(key="date", freq="AS")
650647

651648
# Grouper in a list grouping
652649
result = df.groupby([grouper])
@@ -847,7 +844,7 @@ def test_groupby_with_empty(self):
847844
index = pd.DatetimeIndex(())
848845
data = ()
849846
series = Series(data, index, dtype=object)
850-
grouper = pd.Grouper(freq="D")
847+
grouper = Grouper(freq="D")
851848
grouped = series.groupby(grouper)
852849
assert next(iter(grouped), None) is None
853850

@@ -982,7 +979,7 @@ def test_groupby_with_small_elem(self):
982979
{"event": ["start", "start"], "change": [1234, 5678]},
983980
index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]),
984981
)
985-
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
982+
grouped = df.groupby([Grouper(freq="M"), "event"])
986983
assert len(grouped.groups) == 2
987984
assert grouped.ngroups == 2
988985
assert (Timestamp("2014-09-30"), "start") in grouped.groups
@@ -997,7 +994,7 @@ def test_groupby_with_small_elem(self):
997994
{"event": ["start", "start", "start"], "change": [1234, 5678, 9123]},
998995
index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]),
999996
)
1000-
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
997+
grouped = df.groupby([Grouper(freq="M"), "event"])
1001998
assert len(grouped.groups) == 2
1002999
assert grouped.ngroups == 2
10031000
assert (Timestamp("2014-09-30"), "start") in grouped.groups
@@ -1013,7 +1010,7 @@ def test_groupby_with_small_elem(self):
10131010
{"event": ["start", "start", "start"], "change": [1234, 5678, 9123]},
10141011
index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]),
10151012
)
1016-
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
1013+
grouped = df.groupby([Grouper(freq="M"), "event"])
10171014
assert len(grouped.groups) == 3
10181015
assert grouped.ngroups == 3
10191016
assert (Timestamp("2014-09-30"), "start") in grouped.groups
@@ -1036,3 +1033,17 @@ def test_grouping_string_repr(self):
10361033
result = gr.grouper.groupings[0].__repr__()
10371034
expected = "Grouping(('A', 'a'))"
10381035
assert result == expected
1036+
1037+
1038+
def test_grouping_by_key_is_in_axis():
1039+
# GH#50413 - Groupers specified by key are in-axis
1040+
df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 2], "c": [3, 4, 5]}).set_index("a")
1041+
gb = df.groupby([Grouper(level="a"), Grouper(key="b")], as_index=False)
1042+
assert not gb.grouper.groupings[0].in_axis
1043+
assert gb.grouper.groupings[1].in_axis
1044+
1045+
# Currently only in-axis groupings are including in the result when as_index=False;
1046+
# This is likely to change in the future.
1047+
result = gb.sum()
1048+
expected = DataFrame({"b": [1, 2], "c": [7, 5]})
1049+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)