7
7
from pandas import (
8
8
CategoricalIndex ,
9
9
DataFrame ,
10
+ Grouper ,
10
11
Index ,
11
12
MultiIndex ,
12
13
Series ,
@@ -168,7 +169,7 @@ def test_grouper_index_types(self, index):
168
169
def test_grouper_multilevel_freq (self ):
169
170
170
171
# GH 7885
171
- # with level and freq specified in a pd. Grouper
172
+ # with level and freq specified in a Grouper
172
173
from datetime import (
173
174
date ,
174
175
timedelta ,
@@ -182,20 +183,20 @@ def test_grouper_multilevel_freq(self):
182
183
# Check string level
183
184
expected = (
184
185
df .reset_index ()
185
- .groupby ([pd . Grouper (key = "foo" , freq = "W" ), pd . Grouper (key = "bar" , freq = "W" )])
186
+ .groupby ([Grouper (key = "foo" , freq = "W" ), Grouper (key = "bar" , freq = "W" )])
186
187
.sum ()
187
188
)
188
189
# reset index changes columns dtype to object
189
190
expected .columns = Index ([0 ], dtype = "int64" )
190
191
191
192
result = df .groupby (
192
- [pd . Grouper (level = "foo" , freq = "W" ), pd . Grouper (level = "bar" , freq = "W" )]
193
+ [Grouper (level = "foo" , freq = "W" ), Grouper (level = "bar" , freq = "W" )]
193
194
).sum ()
194
195
tm .assert_frame_equal (result , expected )
195
196
196
197
# Check integer level
197
198
result = df .groupby (
198
- [pd . Grouper (level = 0 , freq = "W" ), pd . Grouper (level = 1 , freq = "W" )]
199
+ [Grouper (level = 0 , freq = "W" ), Grouper (level = 1 , freq = "W" )]
199
200
).sum ()
200
201
tm .assert_frame_equal (result , expected )
201
202
@@ -206,11 +207,11 @@ def test_grouper_creation_bug(self):
206
207
g = df .groupby ("A" )
207
208
expected = g .sum ()
208
209
209
- g = df .groupby (pd . Grouper (key = "A" ))
210
+ g = df .groupby (Grouper (key = "A" ))
210
211
result = g .sum ()
211
212
tm .assert_frame_equal (result , expected )
212
213
213
- g = df .groupby (pd . Grouper (key = "A" , axis = 0 ))
214
+ g = df .groupby (Grouper (key = "A" , axis = 0 ))
214
215
result = g .sum ()
215
216
tm .assert_frame_equal (result , expected )
216
217
@@ -220,13 +221,13 @@ def test_grouper_creation_bug(self):
220
221
tm .assert_frame_equal (result , expected )
221
222
222
223
# GH14334
223
- # pd. Grouper(key=...) may be passed in a list
224
+ # Grouper(key=...) may be passed in a list
224
225
df = DataFrame (
225
226
{"A" : [0 , 0 , 0 , 1 , 1 , 1 ], "B" : [1 , 1 , 2 , 2 , 3 , 3 ], "C" : [1 , 2 , 3 , 4 , 5 , 6 ]}
226
227
)
227
228
# Group by single column
228
229
expected = df .groupby ("A" ).sum ()
229
- g = df .groupby ([pd . Grouper (key = "A" )])
230
+ g = df .groupby ([Grouper (key = "A" )])
230
231
result = g .sum ()
231
232
tm .assert_frame_equal (result , expected )
232
233
@@ -235,17 +236,17 @@ def test_grouper_creation_bug(self):
235
236
expected = df .groupby (["A" , "B" ]).sum ()
236
237
237
238
# Group with two Grouper objects
238
- g = df .groupby ([pd . Grouper (key = "A" ), pd . Grouper (key = "B" )])
239
+ g = df .groupby ([Grouper (key = "A" ), Grouper (key = "B" )])
239
240
result = g .sum ()
240
241
tm .assert_frame_equal (result , expected )
241
242
242
243
# Group with a string and a Grouper object
243
- g = df .groupby (["A" , pd . Grouper (key = "B" )])
244
+ g = df .groupby (["A" , Grouper (key = "B" )])
244
245
result = g .sum ()
245
246
tm .assert_frame_equal (result , expected )
246
247
247
248
# Group with a Grouper object and a string
248
- g = df .groupby ([pd . Grouper (key = "A" ), "B" ])
249
+ g = df .groupby ([Grouper (key = "A" ), "B" ])
249
250
result = g .sum ()
250
251
tm .assert_frame_equal (result , expected )
251
252
@@ -257,15 +258,15 @@ def test_grouper_creation_bug(self):
257
258
names = ["one" , "two" , "three" ],
258
259
),
259
260
)
260
- result = s .groupby (pd . Grouper (level = "three" , freq = "M" )).sum ()
261
+ result = s .groupby (Grouper (level = "three" , freq = "M" )).sum ()
261
262
expected = Series (
262
263
[28 ],
263
264
index = pd .DatetimeIndex ([Timestamp ("2013-01-31" )], freq = "M" , name = "three" ),
264
265
)
265
266
tm .assert_series_equal (result , expected )
266
267
267
268
# just specifying a level breaks
268
- result = s .groupby (pd . Grouper (level = "one" )).sum ()
269
+ result = s .groupby (Grouper (level = "one" )).sum ()
269
270
expected = s .groupby (level = "one" ).sum ()
270
271
tm .assert_series_equal (result , expected )
271
272
@@ -282,18 +283,14 @@ def test_grouper_column_and_index(self):
282
283
{"A" : np .arange (6 ), "B" : ["one" , "one" , "two" , "two" , "one" , "one" ]},
283
284
index = idx ,
284
285
)
285
- result = df_multi .groupby (["B" , pd .Grouper (level = "inner" )]).mean (
286
- numeric_only = True
287
- )
286
+ result = df_multi .groupby (["B" , Grouper (level = "inner" )]).mean (numeric_only = True )
288
287
expected = (
289
288
df_multi .reset_index ().groupby (["B" , "inner" ]).mean (numeric_only = True )
290
289
)
291
290
tm .assert_frame_equal (result , expected )
292
291
293
292
# Test the reverse grouping order
294
- result = df_multi .groupby ([pd .Grouper (level = "inner" ), "B" ]).mean (
295
- numeric_only = True
296
- )
293
+ result = df_multi .groupby ([Grouper (level = "inner" ), "B" ]).mean (numeric_only = True )
297
294
expected = (
298
295
df_multi .reset_index ().groupby (["inner" , "B" ]).mean (numeric_only = True )
299
296
)
@@ -302,7 +299,7 @@ def test_grouper_column_and_index(self):
302
299
# Grouping a single-index frame by a column and the index should
303
300
# be equivalent to resetting the index and grouping by two columns
304
301
df_single = df_multi .reset_index ("outer" )
305
- result = df_single .groupby (["B" , pd . Grouper (level = "inner" )]).mean (
302
+ result = df_single .groupby (["B" , Grouper (level = "inner" )]).mean (
306
303
numeric_only = True
307
304
)
308
305
expected = (
@@ -311,7 +308,7 @@ def test_grouper_column_and_index(self):
311
308
tm .assert_frame_equal (result , expected )
312
309
313
310
# Test the reverse grouping order
314
- result = df_single .groupby ([pd . Grouper (level = "inner" ), "B" ]).mean (
311
+ result = df_single .groupby ([Grouper (level = "inner" ), "B" ]).mean (
315
312
numeric_only = True
316
313
)
317
314
expected = (
@@ -368,7 +365,7 @@ def test_grouper_getting_correct_binner(self):
368
365
),
369
366
)
370
367
result = df .groupby (
371
- [pd . Grouper (level = "one" ), pd . Grouper (level = "two" , freq = "M" )]
368
+ [Grouper (level = "one" ), Grouper (level = "two" , freq = "M" )]
372
369
).sum ()
373
370
expected = DataFrame (
374
371
{"A" : [31 , 28 , 21 , 31 , 28 , 21 ]},
@@ -646,7 +643,7 @@ def test_list_grouper_with_nat(self):
646
643
# GH 14715
647
644
df = DataFrame ({"date" : date_range ("1/1/2011" , periods = 365 , freq = "D" )})
648
645
df .iloc [- 1 ] = pd .NaT
649
- grouper = pd . Grouper (key = "date" , freq = "AS" )
646
+ grouper = Grouper (key = "date" , freq = "AS" )
650
647
651
648
# Grouper in a list grouping
652
649
result = df .groupby ([grouper ])
@@ -847,7 +844,7 @@ def test_groupby_with_empty(self):
847
844
index = pd .DatetimeIndex (())
848
845
data = ()
849
846
series = Series (data , index , dtype = object )
850
- grouper = pd . Grouper (freq = "D" )
847
+ grouper = Grouper (freq = "D" )
851
848
grouped = series .groupby (grouper )
852
849
assert next (iter (grouped ), None ) is None
853
850
@@ -982,7 +979,7 @@ def test_groupby_with_small_elem(self):
982
979
{"event" : ["start" , "start" ], "change" : [1234 , 5678 ]},
983
980
index = pd .DatetimeIndex (["2014-09-10" , "2013-10-10" ]),
984
981
)
985
- grouped = df .groupby ([pd . Grouper (freq = "M" ), "event" ])
982
+ grouped = df .groupby ([Grouper (freq = "M" ), "event" ])
986
983
assert len (grouped .groups ) == 2
987
984
assert grouped .ngroups == 2
988
985
assert (Timestamp ("2014-09-30" ), "start" ) in grouped .groups
@@ -997,7 +994,7 @@ def test_groupby_with_small_elem(self):
997
994
{"event" : ["start" , "start" , "start" ], "change" : [1234 , 5678 , 9123 ]},
998
995
index = pd .DatetimeIndex (["2014-09-10" , "2013-10-10" , "2014-09-15" ]),
999
996
)
1000
- grouped = df .groupby ([pd . Grouper (freq = "M" ), "event" ])
997
+ grouped = df .groupby ([Grouper (freq = "M" ), "event" ])
1001
998
assert len (grouped .groups ) == 2
1002
999
assert grouped .ngroups == 2
1003
1000
assert (Timestamp ("2014-09-30" ), "start" ) in grouped .groups
@@ -1013,7 +1010,7 @@ def test_groupby_with_small_elem(self):
1013
1010
{"event" : ["start" , "start" , "start" ], "change" : [1234 , 5678 , 9123 ]},
1014
1011
index = pd .DatetimeIndex (["2014-09-10" , "2013-10-10" , "2014-08-05" ]),
1015
1012
)
1016
- grouped = df .groupby ([pd . Grouper (freq = "M" ), "event" ])
1013
+ grouped = df .groupby ([Grouper (freq = "M" ), "event" ])
1017
1014
assert len (grouped .groups ) == 3
1018
1015
assert grouped .ngroups == 3
1019
1016
assert (Timestamp ("2014-09-30" ), "start" ) in grouped .groups
@@ -1036,3 +1033,17 @@ def test_grouping_string_repr(self):
1036
1033
result = gr .grouper .groupings [0 ].__repr__ ()
1037
1034
expected = "Grouping(('A', 'a'))"
1038
1035
assert result == expected
1036
+
1037
+
1038
+ def test_grouping_by_key_is_in_axis ():
1039
+ # GH#50413 - Groupers specified by key are in-axis
1040
+ df = DataFrame ({"a" : [1 , 1 , 2 ], "b" : [1 , 1 , 2 ], "c" : [3 , 4 , 5 ]}).set_index ("a" )
1041
+ gb = df .groupby ([Grouper (level = "a" ), Grouper (key = "b" )], as_index = False )
1042
+ assert not gb .grouper .groupings [0 ].in_axis
1043
+ assert gb .grouper .groupings [1 ].in_axis
1044
+
1045
+ # Currently only in-axis groupings are including in the result when as_index=False;
1046
+ # This is likely to change in the future.
1047
+ result = gb .sum ()
1048
+ expected = DataFrame ({"b" : [1 , 2 ], "c" : [7 , 5 ]})
1049
+ tm .assert_frame_equal (result , expected )
0 commit comments