Added remaining tiebreakers; fixed int/float dtype mixup

WillAyd · WillAyd · commit 64960bf2a315 · 2018-02-05T12:33:58.000-08:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -18,7 +18,7 @@ from libc.stdlib cimport malloc, free
 
 from util cimport numeric, get_nat
 from algos cimport (swap, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX,
-                    TIEBREAK_FIRST, TIEBREAK_DENSE)
+                    TIEBREAK_FIRST, TIEBREAK_FIRST_DESCENDING, TIEBREAK_DENSE)
 from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
 
 cdef int64_t iNaT = get_nat()
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -446,7 +446,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_rank_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                         ndarray[{{c_type}}, ndim=2] values,
                         ndarray[int64_t] labels,
                         bint is_datetimelike, **kwargs):
@@ -472,7 +472,21 @@ def group_rank_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
             if tiebreak == TIEBREAK_AVERAGE:
                 for j in range(i - dups + 1, i + 1):
                     out[_as[j], 0] = sum_ranks / dups
-
+            elif tiebreak == TIEBREAK_MIN:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j], 0] = i - grp_start - dups + 2
+            elif tiebreak == TIEBREAK_MAX:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j], 0] = i - grp_start + 1
+            elif tiebreak == TIEBREAK_FIRST:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j], 0] = j + 1
+            elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                for j in range(i - dups + 1, i + 1):
+                   out[_as[j], 0]  = 2 * (i - grp_start) - j - dups + 2
+            elif tiebreak == TIEBREAK_DENSE:
+                for j in range(i - dups + 1, i + 1):
+                    out[_as[j], 0] = val_start - grp_start
             if (i == N - 1 or (
                     (values[_as[i], 0] != values[_as[i+1], 0]) and not
                     (isnan(values[_as[i], 0]) and
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1007,7 +1007,10 @@ def _cython_transform(self, how, numeric_only=True, **kwargs):
                 continue
             except AssertionError as e:
                 raise GroupByError(str(e))
-            output[name] = self._try_cast(result, obj)
+            if self._transform_should_cast(how):
+                output[name] = self._try_cast(result, obj)
+            else:
+                output[name] = result
 
         if len(output) == 0:
             raise DataError('No numeric types to aggregate')
@@ -2325,10 +2328,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
             else:
                 raise
 
-        if is_numeric:
-            out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
+        if how == 'rank':
+            out_dtype = 'float'
         else:
-            out_dtype = 'object'
+            if is_numeric:
+                out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
+            else:
+                out_dtype = 'object'
 
         labels, _, _ = self.group_info
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1945,7 +1945,6 @@ def test_rank_args(self, vals, ties_method, ascending, pct, exp):
         df = DataFrame({'key': ['foo']*5, 'val': vals})
         result = df.groupby('key').rank(method=ties_method, ascending=ascending,
                                         pct=pct)
-
         assert_frame_equal(result, exp)
 
     @pytest.mark.parametrize("vals", [