From 3b7831a8e5d3781041dc007638a3988e1123f1bb Mon Sep 17 00:00:00 2001
From: Nicolas Bonnotte <nicolas.bonnotte@gmail.com>
Date: Wed, 1 Jun 2016 21:12:43 +0200
Subject: [PATCH 1/3] BUG in DataFrameGroupBy.rank returning empty frame #11759

fixes #11759
---
 pandas/core/groupby.py       | 24 +++++++++++++++++++++++-
 pandas/tests/test_groupby.py | 20 ++++++++++++++++++--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 6179857978b7b..356a353120828 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -76,7 +76,7 @@
     'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
     'resample',
     'describe',
-    'rank', 'quantile',
+    'quantile',
     'fillna',
     'mad',
     'any', 'all',
@@ -1378,6 +1378,27 @@ def cumsum(self, axis=0, *args, **kwargs):
 
         return self._cython_transform('cumsum')
 
+    @Substitution(name='groupby')
+    @Appender(_doc_template)
+    def rank(self, axis=0, method='average', numeric_only=True,
+             na_option='keep', ascending=True, pct=False):
+        """Compute numerical data ranks (1 through n) along axis.
+        """
+
+        if numeric_only:
+            data = self._obj_with_exclusions._get_numeric_data()
+            if data.size == 0:
+                raise DataError('No numeric types to aggregate')
+            data = data.groupby(self.grouper)
+        else:
+            data = self
+
+        def wrapper(values):
+            return values.rank(axis=axis, method=method, na_option=na_option,
+                               ascending=ascending, pct=pct)
+
+        return data.transform(wrapper)
+
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def shift(self, periods=1, freq=None, axis=0):
@@ -3182,6 +3203,7 @@ def aggregate(self, arg, *args, **kwargs):
     agg = aggregate
 
     def _aggregate_generic(self, func, *args, **kwargs):
+
         if self.grouper.nkeys != 1:
             raise AssertionError('Number of keys must be 1')
 
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 3f5b4152afe31..a4c2bf75aff8d 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3646,6 +3646,24 @@ def test_column_select_via_attr(self):
         expected = self.df.groupby('A').agg(np.mean)
         assert_frame_equal(result, expected)
 
+    def test_rank(self):
+        # GH 11759
+        df = DataFrame({'a': ['A1', 'A1', 'A1'],
+                        'b': ['B1', 'B1', 'B2'],
+                        'c': 1.})
+        df = df.set_index('a')
+        dg = df.groupby('c')
+        self.assertRaises(DataError, dg.rank, method='first')
+
+        # with another numeric column
+        df = DataFrame({'a': ['A1', 'A1', 'A1'],
+                        'b': ['B1', 'B1', 'B2'],
+                        'c': 1.,
+                        'd': 1.})
+        df = df.set_index('a')
+        expected = df.drop('b', axis=1).groupby('c').rank(method='first')
+        assert_frame_equal(df.groupby('c').rank(method='first'), expected)
+
     def test_rank_apply(self):
         lev1 = tm.rands_array(10, 100)
         lev2 = tm.rands_array(10, 130)
@@ -5753,7 +5771,6 @@ def test_groupby_whitelist(self):
             'cumcount',
             'resample',
             'describe',
-            'rank',
             'quantile',
             'fillna',
             'mad',
@@ -5794,7 +5811,6 @@ def test_groupby_whitelist(self):
             'cumcount',
             'resample',
             'describe',
-            'rank',
             'quantile',
             'fillna',
             'mad',

From 6f1e356a27690f6981cdf1eb4b78d0efa83591b4 Mon Sep 17 00:00:00 2001
From: Nicolas Bonnotte <nicolas.bonnotte@gmail.com>
Date: Wed, 8 Jun 2016 22:30:11 +0200
Subject: [PATCH 2/3] More tests

---
 pandas/core/groupby.py       |  9 ++++++++-
 pandas/tests/test_groupby.py | 21 +++++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 356a353120828..179929fa01e7b 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1397,7 +1397,14 @@ def wrapper(values):
             return values.rank(axis=axis, method=method, na_option=na_option,
                                ascending=ascending, pct=pct)
 
-        return data.transform(wrapper)
+        try:
+            return data.transform(wrapper)
+        except ValueError:
+            if not numeric_only and method=='first':
+                raise ValueError('first not supported for non-numeric data')
+                # such a ValueError is raised by pandas.algos.rank_2d_generic
+                # for regular (non-grouped) dataframes
+
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index a4c2bf75aff8d..79b29a46de6df 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3653,7 +3653,14 @@ def test_rank(self):
                         'c': 1.})
         df = df.set_index('a')
         dg = df.groupby('c')
-        self.assertRaises(DataError, dg.rank, method='first')
+        self.assertRaises(DataError, dg.rank,
+                          method='first')
+        self.assertRaises(DataError, dg.rank,
+                          method='first', numeric_only=True)
+        self.assertRaises(ValueError, dg.rank,
+                          method='first', numeric_only=False)
+        # such a ValueError is raised by pandas.algos.rank_2d_generic
+        # for regular (non-grouped) dataframes
 
         # with another numeric column
         df = DataFrame({'a': ['A1', 'A1', 'A1'],
@@ -3661,8 +3668,18 @@ def test_rank(self):
                         'c': 1.,
                         'd': 1.})
         df = df.set_index('a')
+        dg = df.groupby('c')
         expected = df.drop('b', axis=1).groupby('c').rank(method='first')
-        assert_frame_equal(df.groupby('c').rank(method='first'), expected)
+
+        result = dg.rank(method='first')
+        assert_frame_equal(result, expected)
+
+        result = dg.rank(method='first', numeric_only=True)
+        assert_frame_equal(result, expected)
+
+        self.assertRaises(ValueError, dg.rank,
+                          method='first', numeric_only=False)
+        # same remark as above
 
     def test_rank_apply(self):
         lev1 = tm.rands_array(10, 100)

From f432b2b51e3fe881c51bfce817b026bf74d2e270 Mon Sep 17 00:00:00 2001
From: Nicolas Bonnotte <nicolas.bonnotte@gmail.com>
Date: Sat, 18 Jun 2016 16:13:21 +0200
Subject: [PATCH 3/3] More tests: categorical and datetime

---
 pandas/core/groupby.py       | 20 ++++++-------
 pandas/tests/test_groupby.py | 57 ++++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 179929fa01e7b..f529af6543939 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1385,26 +1385,24 @@ def rank(self, axis=0, method='average', numeric_only=True,
         """Compute numerical data ranks (1 through n) along axis.
         """
 
-        if numeric_only:
-            data = self._obj_with_exclusions._get_numeric_data()
-            if data.size == 0:
-                raise DataError('No numeric types to aggregate')
-            data = data.groupby(self.grouper)
-        else:
-            data = self
-
         def wrapper(values):
             return values.rank(axis=axis, method=method, na_option=na_option,
                                ascending=ascending, pct=pct)
 
         try:
-            return data.transform(wrapper)
+            return self.transform(wrapper)
         except ValueError:
-            if not numeric_only and method=='first':
+            if not numeric_only and method == 'first':
                 raise ValueError('first not supported for non-numeric data')
                 # such a ValueError is raised by pandas.algos.rank_2d_generic
                 # for regular (non-grouped) dataframes
-
+            if numeric_only:
+                data = self._obj_with_exclusions._get_numeric_data()
+                if data.size == 0:
+                    raise DataError('No numeric types to aggregate')
+                data = data.groupby(self.grouper)
+                return data.transform(wrapper)
+            raise
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 79b29a46de6df..ba0b343aa49ce 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3647,9 +3647,28 @@ def test_column_select_via_attr(self):
         assert_frame_equal(result, expected)
 
     def test_rank(self):
-        # GH 11759
+        # normal behavior
         df = DataFrame({'a': ['A1', 'A1', 'A1'],
-                        'b': ['B1', 'B1', 'B2'],
+                        'b': [2, 1, 1],
+                        'c': 1.})
+        df = df.set_index('a')
+        dg = df.groupby('c')
+        expected = DataFrame({'a': ['A1', 'A1', 'A1'],
+                              'b': [3., 1., 2.]})
+        expected = expected.set_index('a')
+
+        result = dg.rank(method='first')
+        assert_frame_equal(result, expected)
+
+        result = dg.rank(method='first', numeric_only=True)
+        assert_frame_equal(result, expected)
+
+        result = dg.rank(method='first', numeric_only=False)
+        assert_frame_equal(result, expected)
+
+        # GH 11759: non numeric data
+        df = DataFrame({'a': ['A1', 'A1', 'A1'],
+                        'b': ['B2', 'B1', 'B1'],
                         'c': 1.})
         df = df.set_index('a')
         dg = df.groupby('c')
@@ -3662,9 +3681,41 @@ def test_rank(self):
         # such a ValueError is raised by pandas.algos.rank_2d_generic
         # for regular (non-grouped) dataframes
 
+        # with categorical data
+        df = DataFrame({'a': ['A1', 'A1', 'A1'],
+                        'b': Categorical(['big', 'small', 'small'],
+                                         categories=['small', 'big'],
+                                         ordered=True),
+                        'c': 1.})
+        df = df.set_index('a')
+        dg = df.groupby('c')
+        self.assertRaises(DataError, dg.rank,
+                          method='first')
+        self.assertRaises(DataError, dg.rank,
+                          method='first', numeric_only=True)
+        self.assertRaises(ValueError, dg.rank,
+                          method='first', numeric_only=False)
+
+        # with datetime data
+        df = DataFrame({'a': ['A1', 'A1', 'A1'],
+                        'b': [datetime(2002, 2, 2), datetime(2001, 1, 1),
+                              datetime(2001, 1, 1)],
+                        'c': 1.})
+        df = df.set_index('a')
+        dg = df.groupby('c')
+
+        result = dg.rank(method='first')
+        assert_frame_equal(result, expected)
+
+        result = dg.rank(method='first', numeric_only=True)
+        assert_frame_equal(result, expected)
+
+        result = dg.rank(method='first', numeric_only=False)
+        assert_frame_equal(result, expected)
+
         # with another numeric column
         df = DataFrame({'a': ['A1', 'A1', 'A1'],
-                        'b': ['B1', 'B1', 'B2'],
+                        'b': ['B2', 'B1', 'B1'],
                         'c': 1.,
                         'd': 1.})
         df = df.set_index('a')