Skip to content

Commit ef38319

Browse files
committed
TST more coverage for groupby head and tail
1 parent e8e7735 commit ef38319

File tree

2 files changed

+52
-27
lines changed

2 files changed

+52
-27
lines changed

pandas/core/groupby.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,13 +482,18 @@ def picker(arr):
482482
return self.agg(picker)
483483

484484
def cumcount(self, **kwargs):
485-
'''
486-
Number each item in each group from 0 to the length of that group.
485+
"""
486+
Number each item in each group from 0 to the length of that group - 1.
487487
488488
Essentially this is equivalent to
489489
490490
>>> self.apply(lambda x: Series(np.arange(len(x)), x.index))
491491
492+
Parameters
493+
----------
494+
ascending : bool, default True
495+
If False, number in reverse, from length of group - 1 to 0.
496+
492497
Example
493498
-------
494499
@@ -510,8 +515,16 @@ def cumcount(self, **kwargs):
510515
4 1
511516
5 3
512517
dtype: int64
518+
>>> df.groupby('A').cumcount(ascending=False)
519+
0 3
520+
1 2
521+
2 1
522+
3 1
523+
4 0
524+
5 0
525+
dtype: int64
513526
514-
'''
527+
"""
515528
ascending = kwargs.pop('ascending', True)
516529

517530
index = self.obj.index
@@ -520,10 +533,10 @@ def cumcount(self, **kwargs):
520533
return Series(cumcounts, index)
521534

522535
def head(self, n=5):
523-
'''
536+
"""
524537
Returns first n rows of each group.
525538
526-
Essentially equivalent to .apply(lambda x: x.head(n))
539+
Essentially equivalent to ``.apply(lambda x: x.head(n))``
527540
528541
Example
529542
-------
@@ -540,7 +553,7 @@ def head(self, n=5):
540553
1 0 1 2
541554
5 2 5 6
542555
543-
'''
556+
"""
544557
rng = np.arange(self.grouper._max_groupsize, dtype='int64')
545558
in_head = self._cumcount_array(rng) < n
546559
head = self.obj[in_head]
@@ -549,10 +562,10 @@ def head(self, n=5):
549562
return head
550563

551564
def tail(self, n=5):
552-
'''
553-
Returns first n rows of each group
565+
"""
566+
Returns last n rows of each group
554567
555-
Essentially equivalent to .apply(lambda x: x.tail(n))
568+
Essentially equivalent to ``.apply(lambda x: x.tail(n))``
556569
557570
Example
558571
-------
@@ -568,7 +581,8 @@ def tail(self, n=5):
568581
A
569582
1 0 1 2
570583
5 2 5 6
571-
'''
584+
585+
"""
572586
rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64')
573587
in_tail = self._cumcount_array(rng, ascending=False) > -n
574588
tail = self.obj[in_tail]
@@ -590,10 +604,10 @@ def _cumcount_array(self, arr, **kwargs):
590604
return cumcounts
591605

592606
def _index_with_as_index(self, b):
593-
'''
607+
"""
594608
Take boolean mask of index to be returned from apply, if as_index=True
595609
596-
'''
610+
"""
597611
# TODO perf, it feels like this should already be somewhere...
598612
from itertools import chain
599613
original = self.obj.index

pandas/tests/test_groupby.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,25 +1230,36 @@ def test_groupby_head_tail(self):
12301230
g_as = df.groupby('A', as_index=True)
12311231
g_not_as = df.groupby('A', as_index=False)
12321232

1233-
# as_index= False much easier
1234-
exp_head_not_as = df.loc[[0, 2]]
1235-
res_head_not_as = g_not_as.head(1)
1236-
assert_frame_equal(exp_head_not_as, res_head_not_as)
1237-
exp_tail_not_as = df.loc[[1, 2]]
1238-
res_tail_not_as = g_not_as.tail(1)
1239-
assert_frame_equal(exp_tail_not_as, res_tail_not_as)
1233+
# as_index= False, much easier
1234+
assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
1235+
assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
12401236

1241-
# as_index=True, yuck
1242-
res_head_as = g_as.head(1)
1243-
res_tail_as = g_as.tail(1)
1237+
empty_not_as = DataFrame(columns=df.columns)
1238+
assert_frame_equal(empty_not_as, g_not_as.head(0))
1239+
assert_frame_equal(empty_not_as, g_not_as.tail(0))
1240+
assert_frame_equal(empty_not_as, g_not_as.head(-1))
1241+
assert_frame_equal(empty_not_as, g_not_as.tail(-1))
1242+
1243+
assert_frame_equal(df, g_not_as.head(7)) # contains all
1244+
assert_frame_equal(df, g_not_as.tail(7))
12441245

1246+
# as_index=True, yuck
12451247
# prepend the A column as an index, in a roundabout way
1246-
df.index = df.set_index('A', append=True, drop=False).index.swaplevel(0, 1)
1247-
exp_head_as = df.loc[[0, 2]]
1248-
exp_tail_as = df.loc[[1, 2]]
1248+
df_as = df.copy()
1249+
df_as.index = df.set_index('A', append=True,
1250+
drop=False).index.swaplevel(0, 1)
1251+
1252+
assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
1253+
assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
1254+
1255+
empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
1256+
assert_frame_equal(empty_as, g_as.head(0))
1257+
assert_frame_equal(empty_as, g_as.tail(0))
1258+
assert_frame_equal(empty_as, g_as.head(-1))
1259+
assert_frame_equal(empty_as, g_as.tail(-1))
12491260

1250-
assert_frame_equal(exp_head_as, res_head_as)
1251-
assert_frame_equal(exp_tail_as, res_tail_as)
1261+
assert_frame_equal(df_as, g_as.head(7)) # contains all
1262+
assert_frame_equal(df_as, g_as.tail(7))
12521263

12531264
def test_groupby_multiple_key(self):
12541265
df = tm.makeTimeDataFrame()

0 commit comments

Comments
 (0)