diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 1382f2c1a3be6..89b6db84c6ceb 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -280,6 +280,36 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1 ser pd.Timestamp('2012-01-01') - ser + +Signature change for .rank +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`) + +Previous signature + +.. code-block:: python + + In [3]: pd.Series([0,1]).rank(method='average', na_option='keep', ascending=True, pct=False) + Out[3]: + 0 1 + 1 2 + dtype: float64 + + In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None, method='average', na_option='keep', ascending=True, pct=False) + Out[4]: + 0 + 0 1 + 1 2 + +New signature + +.. ipython:: python + + pd.Series([0,1]).rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False) + pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False) + + Bug in QuarterBegin with n=0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 907da619b1875..0cdc811e194db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5005,55 +5005,6 @@ def f(arr, per, interpolation): result.name = None # For groupby, so it can set an index name return result - def rank(self, axis=0, numeric_only=None, method='average', - na_option='keep', ascending=True, pct=False): - """ - Compute numerical data ranks (1 through n) along axis. Equal values are - assigned a rank that is the average of the ranks of those values - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - Ranks over columns (0) or rows (1) - numeric_only : boolean, default None - Include only float, int, boolean data - method : {'average', 'min', 'max', 'first', 'dense'} - * average: average rank of group - * min: lowest rank in group - * max: highest rank in group - * first: ranks assigned in order they appear in the array - * dense: like 'min', but rank always increases by 1 between groups - na_option : {'keep', 'top', 'bottom'} - * keep: leave NA values where they are - * top: smallest rank if ascending - * bottom: smallest rank if descending - ascending : boolean, default True - False for ranks by high (1) to low (N) - pct : boolean, default False - Computes percentage rank of data - - Returns - ------- - ranks : DataFrame - """ - axis = self._get_axis_number(axis) - if numeric_only is None: - try: - ranks = algos.rank(self.values, axis=axis, method=method, - ascending=ascending, na_option=na_option, - pct=pct) - return self._constructor(ranks, index=self.index, - columns=self.columns) - except TypeError: - numeric_only = True - if numeric_only: - data = self._get_numeric_data() - else: - data = self - ranks = algos.rank(data.values, axis=axis, method=method, - ascending=ascending, na_option=na_option, pct=pct) - return self._constructor(ranks, index=data.index, columns=data.columns) - def to_timestamp(self, freq=None, how='start', axis=0, copy=True): """ Cast to DatetimeIndex of timestamps, at *beginning* of period diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2b659ee355e51..bd19f2c2302d5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -15,6 +15,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex from pandas.core.internals import BlockManager +import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.missing as mis import pandas.core.datetools as datetools @@ -3751,6 +3752,66 @@ def last(self, offset): start = self.index.searchsorted(start_date, side='right') return self.ix[start:] + def rank(self, axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False): + """ + Compute numerical data ranks (1 through n) along axis. Equal values are + assigned a rank that is the average of the ranks of those values + + Parameters + ---------- + axis: {0 or 'index', 1 or 'columns'}, default 0 + index to direct ranking + method : {'average', 'min', 'max', 'first', 'dense'} + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + numeric_only : boolean, default None + Include only float, int, boolean data. Valid only for DataFrame or + Panel objects + na_option : {'keep', 'top', 'bottom'} + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + ascending : boolean, default True + False for ranks by high (1) to low (N) + pct : boolean, default False + Computes percentage rank of data + + Returns + ------- + ranks : same type as caller + """ + axis = self._get_axis_number(axis) + + if self.ndim > 2: + msg = "rank does not make sense when ndim > 2" + raise NotImplementedError(msg) + + def ranker(data): + ranks = algos.rank(data.values, axis=axis, method=method, + ascending=ascending, na_option=na_option, + pct=pct) + ranks = self._constructor(ranks, **data._construct_axes_dict()) + return ranks.__finalize__(self) + + # if numeric_only is None, and we can't get anything, we try with + # numeric_only=True + if numeric_only is None: + try: + return ranker(self) + except TypeError: + numeric_only = True + + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + return ranker(data) + _shared_docs['align'] = (""" Align two object on their axes with the specified join method for each axis Index diff --git a/pandas/core/series.py b/pandas/core/series.py index 73cca93a498c5..699a0ca66f5f9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1865,36 +1865,6 @@ def argsort(self, axis=0, kind='quicksort', order=None): np.argsort(values, kind=kind), index=self.index, dtype='int64').__finalize__(self) - def rank(self, method='average', na_option='keep', ascending=True, - pct=False): - """ - Compute data ranks (1 through n). Equal values are assigned a rank that - is the average of the ranks of those values - - Parameters - ---------- - method : {'average', 'min', 'max', 'first', 'dense'} - * average: average rank of group - * min: lowest rank in group - * max: highest rank in group - * first: ranks assigned in order they appear in the array - * dense: like 'min', but rank always increases by 1 between groups - na_option : {'keep'} - keep: leave NA values where they are - ascending : boolean, default True - False for ranks by high (1) to low (N) - pct : boolean, default False - Computes percentage rank of data - - Returns - ------- - ranks : Series - """ - ranks = algorithms.rank(self._values, method=method, - na_option=na_option, ascending=ascending, - pct=pct) - return self._constructor(ranks, index=self.index).__finalize__(self) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) def nlargest(self, n=5, keep='first'): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index e1ba981e93d2e..89163dc8f0662 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -875,6 +875,12 @@ def test_rank_na_option(self): assert_almost_equal(ranks0.values, exp0) assert_almost_equal(ranks1.values, exp1) + def test_rank_axis(self): + # check if using axes' names gives the same result + df = pd.DataFrame([[2, 1], [4, 3]]) + assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) + assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) + def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 385767e14113f..40ef3188e50f7 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1025,6 +1025,11 @@ def test_rank(self): iranks = iseries.rank() assert_series_equal(iranks, exp) + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method='average') + self.assertRaises(ValueError, s.rank, 'average') + def test_rank_inf(self): raise nose.SkipTest('DataFrame.rank does not currently rank ' 'np.inf and -np.inf properly') diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index ef1bd734de776..b4cc57cb8216c 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -59,7 +59,7 @@ def test_rank_methods_series(self): ts = Series(vals, index=index) for m in ['average', 'min', 'max', 'first', 'dense']: - result = ts.rank(m) + result = ts.rank(method=m) sprank = rankdata(vals, m if m != 'first' else 'ordinal') tm.assert_series_equal(result, Series(sprank, index=index))