Skip to content

Commit 00a5dcb

Browse files
ENH: added df/series.sort_values(key=...) and df/series.sort_index(key=...) functionality
1 parent ea2e26a commit 00a5dcb

File tree

2 files changed

+34
-6
lines changed

2 files changed

+34
-6
lines changed

pandas/core/frame.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4722,6 +4722,7 @@ def sort_values(
47224722
inplace=False,
47234723
kind="quicksort",
47244724
na_position="last",
4725+
key = None
47254726
):
47264727
inplace = validate_bool_kwarg(inplace, "inplace")
47274728
axis = self._get_axis_number(axis)
@@ -4735,7 +4736,12 @@ def sort_values(
47354736
if len(by) > 1:
47364737
from pandas.core.sorting import lexsort_indexer
47374738

4738-
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4739+
if key is not None:
4740+
key_func = np.vectorize(key)
4741+
keys = [key_func(self._get_label_or_level_values(x, axis=axis)) for x in by]
4742+
else:
4743+
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4744+
47394745
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position)
47404746
indexer = ensure_platform_int(indexer)
47414747
else:
@@ -4744,6 +4750,10 @@ def sort_values(
47444750
by = by[0]
47454751
k = self._get_label_or_level_values(by, axis=axis)
47464752

4753+
if key is not None:
4754+
key_func = np.vectorize(key)
4755+
k = key_func(k)
4756+
47474757
if isinstance(ascending, (tuple, list)):
47484758
ascending = ascending[0]
47494759

@@ -4772,6 +4782,7 @@ def sort_index(
47724782
na_position="last",
47734783
sort_remaining=True,
47744784
by=None,
4785+
key=None
47754786
):
47764787

47774788
# TODO: this can be combined with Series.sort_index impl as
@@ -4792,21 +4803,23 @@ def sort_index(
47924803

47934804
axis = self._get_axis_number(axis)
47944805
labels = self._get_axis(axis)
4795-
4806+
if key is not None:
4807+
labels = labels.map(key)
4808+
47964809
# make sure that the axis is lexsorted to start
47974810
# if not we need to reconstruct to get the correct indexer
47984811
labels = labels._sort_levels_monotonic()
47994812
if level is not None:
4800-
48014813
new_axis, indexer = labels.sortlevel(
48024814
level, ascending=ascending, sort_remaining=sort_remaining
48034815
)
48044816

48054817
elif isinstance(labels, ABCMultiIndex):
48064818
from pandas.core.sorting import lexsort_indexer
48074819

4820+
codes = labels._get_codes_for_sorting()
48084821
indexer = lexsort_indexer(
4809-
labels._get_codes_for_sorting(),
4822+
codes,
48104823
orders=ascending,
48114824
na_position=na_position,
48124825
)

pandas/core/series.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,6 +2838,7 @@ def sort_values(
28382838
inplace=False,
28392839
kind="quicksort",
28402840
na_position="last",
2841+
key=None
28412842
):
28422843
"""
28432844
Sort by the values.
@@ -2955,6 +2956,10 @@ def sort_values(
29552956
)
29562957

29572958
def _try_kind_sort(arr):
2959+
if key is not None:
2960+
key_func = np.vectorize(key)
2961+
arr = key_func(arr)
2962+
29582963
# easier to ask forgiveness than permission
29592964
try:
29602965
# if kind==mergesort, it can fail for object dtype
@@ -3014,6 +3019,7 @@ def sort_index(
30143019
kind="quicksort",
30153020
na_position="last",
30163021
sort_remaining=True,
3022+
key=None
30173023
):
30183024
"""
30193025
Sort Series by index labels.
@@ -3131,17 +3137,22 @@ def sort_index(
31313137
# Validate the axis parameter
31323138
self._get_axis_number(axis)
31333139
index = self.index
3134-
3140+
if key is not None:
3141+
index = index.map(key)
3142+
31353143
if level is not None:
31363144
new_index, indexer = index.sortlevel(
31373145
level, ascending=ascending, sort_remaining=sort_remaining
31383146
)
3147+
31393148
elif isinstance(index, MultiIndex):
31403149
from pandas.core.sorting import lexsort_indexer
31413150

31423151
labels = index._sort_levels_monotonic()
3152+
codes = labels._get_codes_for_sorting()
3153+
31433154
indexer = lexsort_indexer(
3144-
labels._get_codes_for_sorting(),
3155+
codes,
31453156
orders=ascending,
31463157
na_position=na_position,
31473158
)
@@ -3158,6 +3169,10 @@ def sort_index(
31583169
else:
31593170
return self.copy()
31603171

3172+
if key is not None:
3173+
key_func = np.vectorize(key)
3174+
index = key_func(index)
3175+
31613176
indexer = nargsort(
31623177
index, kind=kind, ascending=ascending, na_position=na_position
31633178
)

0 commit comments

Comments
 (0)