Skip to content

Commit 64960bf

Browse files
committed
Added remaining tiebreakers; fixed int/float dtype mixup
1 parent b4c3dfd commit 64960bf

File tree

4 files changed

+27
-8
lines changed

4 files changed

+27
-8
lines changed

pandas/_libs/groupby.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ from libc.stdlib cimport malloc, free
1818

1919
from util cimport numeric, get_nat
2020
from algos cimport (swap, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX,
21-
TIEBREAK_FIRST, TIEBREAK_DENSE)
21+
TIEBREAK_FIRST, TIEBREAK_FIRST_DESCENDING, TIEBREAK_DENSE)
2222
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
2323

2424
cdef int64_t iNaT = get_nat()

pandas/_libs/groupby_helper.pxi.in

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
446446

447447
@cython.boundscheck(False)
448448
@cython.wraparound(False)
449-
def group_rank_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
449+
def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
450450
ndarray[{{c_type}}, ndim=2] values,
451451
ndarray[int64_t] labels,
452452
bint is_datetimelike, **kwargs):
@@ -472,7 +472,21 @@ def group_rank_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
472472
if tiebreak == TIEBREAK_AVERAGE:
473473
for j in range(i - dups + 1, i + 1):
474474
out[_as[j], 0] = sum_ranks / dups
475-
475+
elif tiebreak == TIEBREAK_MIN:
476+
for j in range(i - dups + 1, i + 1):
477+
out[_as[j], 0] = i - grp_start - dups + 2
478+
elif tiebreak == TIEBREAK_MAX:
479+
for j in range(i - dups + 1, i + 1):
480+
out[_as[j], 0] = i - grp_start + 1
481+
elif tiebreak == TIEBREAK_FIRST:
482+
for j in range(i - dups + 1, i + 1):
483+
out[_as[j], 0] = j + 1
484+
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
485+
for j in range(i - dups + 1, i + 1):
486+
out[_as[j], 0] = 2 * (i - grp_start) - j - dups + 2
487+
elif tiebreak == TIEBREAK_DENSE:
488+
for j in range(i - dups + 1, i + 1):
489+
out[_as[j], 0] = val_start - grp_start
476490
if (i == N - 1 or (
477491
(values[_as[i], 0] != values[_as[i+1], 0]) and not
478492
(isnan(values[_as[i], 0]) and

pandas/core/groupby.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,10 @@ def _cython_transform(self, how, numeric_only=True, **kwargs):
10071007
continue
10081008
except AssertionError as e:
10091009
raise GroupByError(str(e))
1010-
output[name] = self._try_cast(result, obj)
1010+
if self._transform_should_cast(how):
1011+
output[name] = self._try_cast(result, obj)
1012+
else:
1013+
output[name] = result
10111014

10121015
if len(output) == 0:
10131016
raise DataError('No numeric types to aggregate')
@@ -2325,10 +2328,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
23252328
else:
23262329
raise
23272330

2328-
if is_numeric:
2329-
out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
2331+
if how == 'rank':
2332+
out_dtype = 'float'
23302333
else:
2331-
out_dtype = 'object'
2334+
if is_numeric:
2335+
out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
2336+
else:
2337+
out_dtype = 'object'
23322338

23332339
labels, _, _ = self.group_info
23342340

pandas/tests/groupby/test_groupby.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1945,7 +1945,6 @@ def test_rank_args(self, vals, ties_method, ascending, pct, exp):
19451945
df = DataFrame({'key': ['foo']*5, 'val': vals})
19461946
result = df.groupby('key').rank(method=ties_method, ascending=ascending,
19471947
pct=pct)
1948-
19491948
assert_frame_equal(result, exp)
19501949

19511950
@pytest.mark.parametrize("vals", [

0 commit comments

Comments
 (0)