diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index c0db87d58ef08..22f1414c4f2b0 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -664,6 +664,18 @@ In this example, we chopped the collection of time series into yearly chunks then independently called :ref:`fillna ` on the groups. +.. versionadded:: 0.14.1 + +The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: + +.. ipython:: python + + s = Series([9, 8, 7, 5, 19, 1, 4.2, 3.3]) + g = Series(list('abababab')) + gb = s.groupby(g) + gb.nlargest(3) + gb.nsmallest(3) + .. _groupby.apply: Flexible ``apply`` diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 9373b59025399..1cb6aadf3f40f 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -114,6 +114,9 @@ Enhancements - Implemented ``sem`` (standard error of the mean) operation for ``Series``, ``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`) +- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` whitelist, + which means you can now use these methods on a ``SeriesGroupBy`` object + (:issue:`7053`). diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2714e9f22cd95..e6af3c20bea00 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -78,7 +78,8 @@ _series_apply_whitelist = \ (_common_apply_whitelist - set(['boxplot'])) | \ - frozenset(['dtype', 'value_counts', 'unique', 'nunique']) + frozenset(['dtype', 'value_counts', 'unique', 'nunique', + 'nlargest', 'nsmallest']) _dataframe_apply_whitelist = \ _common_apply_whitelist | frozenset(['dtypes', 'corrwith']) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 1f1853186ac8a..1da51ce824120 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4047,6 +4047,7 @@ def test_groupby_whitelist(self): 'value_counts', 'diff', 'unique', 'nunique', + 'nlargest', 'nsmallest', ]) for obj, whitelist in zip((df, s), @@ -4381,6 +4382,27 @@ def test_max_nan_bug(self): tm.assert_frame_equal(r, e) self.assertFalse(r['File'].isnull().any()) + def test_nlargest(self): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list('a' * 5 + 'b' * 5)) + gb = a.groupby(b) + r = gb.nlargest(3) + e = Series([7, 5, 3, 10, 9, 6], + index=MultiIndex.from_arrays([list('aaabbb'), + [3, 2, 1, 9, 5, 8]])) + tm.assert_series_equal(r, e) + + def test_nsmallest(self): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list('a' * 5 + 'b' * 5)) + gb = a.groupby(b) + r = gb.nsmallest(3) + e = Series([1, 2, 3, 0, 4, 6], + index=MultiIndex.from_arrays([list('aaabbb'), + [0, 4, 1, 6, 7, 8]])) + tm.assert_series_equal(r, e) + + def assert_fp_equal(a, b): assert (np.abs(a - b) < 1e-12).all()