Skip to content

Commit 87dca5d

Browse files
ENH: added df/series.sort_values(key=...) and df/series.sort_index(key=...) functionality
1 parent 2efb607 commit 87dca5d

File tree

2 files changed

+34
-6
lines changed

2 files changed

+34
-6
lines changed

pandas/core/frame.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4977,6 +4977,7 @@ def sort_values(
49774977
inplace=False,
49784978
kind="quicksort",
49794979
na_position="last",
4980+
key = None
49804981
):
49814982
inplace = validate_bool_kwarg(inplace, "inplace")
49824983
axis = self._get_axis_number(axis)
@@ -4991,7 +4992,12 @@ def sort_values(
49914992
if len(by) > 1:
49924993
from pandas.core.sorting import lexsort_indexer
49934994

4994-
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4995+
if key is not None:
4996+
key_func = np.vectorize(key)
4997+
keys = [key_func(self._get_label_or_level_values(x, axis=axis)) for x in by]
4998+
else:
4999+
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
5000+
49955001
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position)
49965002
indexer = ensure_platform_int(indexer)
49975003
else:
@@ -5000,6 +5006,10 @@ def sort_values(
50005006
by = by[0]
50015007
k = self._get_label_or_level_values(by, axis=axis)
50025008

5009+
if key is not None:
5010+
key_func = np.vectorize(key)
5011+
k = key_func(k)
5012+
50035013
if isinstance(ascending, (tuple, list)):
50045014
ascending = ascending[0]
50055015

@@ -5028,6 +5038,7 @@ def sort_index(
50285038
na_position="last",
50295039
sort_remaining=True,
50305040
by=None,
5041+
key=None
50315042
):
50325043

50335044
# TODO: this can be combined with Series.sort_index impl as
@@ -5048,21 +5059,23 @@ def sort_index(
50485059

50495060
axis = self._get_axis_number(axis)
50505061
labels = self._get_axis(axis)
5051-
5062+
if key is not None:
5063+
labels = labels.map(key)
5064+
50525065
# make sure that the axis is lexsorted to start
50535066
# if not we need to reconstruct to get the correct indexer
50545067
labels = labels._sort_levels_monotonic()
50555068
if level is not None:
5056-
50575069
new_axis, indexer = labels.sortlevel(
50585070
level, ascending=ascending, sort_remaining=sort_remaining
50595071
)
50605072

50615073
elif isinstance(labels, MultiIndex):
50625074
from pandas.core.sorting import lexsort_indexer
50635075

5076+
codes = labels._get_codes_for_sorting()
50645077
indexer = lexsort_indexer(
5065-
labels._get_codes_for_sorting(),
5078+
codes,
50665079
orders=ascending,
50675080
na_position=na_position,
50685081
)

pandas/core/series.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3019,6 +3019,7 @@ def sort_values(
30193019
inplace=False,
30203020
kind="quicksort",
30213021
na_position="last",
3022+
key=None
30223023
):
30233024
"""
30243025
Sort by the values.
@@ -3136,6 +3137,10 @@ def sort_values(
31363137
)
31373138

31383139
def _try_kind_sort(arr):
3140+
if key is not None:
3141+
key_func = np.vectorize(key)
3142+
arr = key_func(arr)
3143+
31393144
# easier to ask forgiveness than permission
31403145
try:
31413146
# if kind==mergesort, it can fail for object dtype
@@ -3196,6 +3201,7 @@ def sort_index(
31963201
kind="quicksort",
31973202
na_position="last",
31983203
sort_remaining=True,
3204+
key=None
31993205
):
32003206
"""
32013207
Sort Series by index labels.
@@ -3313,17 +3319,22 @@ def sort_index(
33133319
# Validate the axis parameter
33143320
self._get_axis_number(axis)
33153321
index = self.index
3316-
3322+
if key is not None:
3323+
index = index.map(key)
3324+
33173325
if level is not None:
33183326
new_index, indexer = index.sortlevel(
33193327
level, ascending=ascending, sort_remaining=sort_remaining
33203328
)
3329+
33213330
elif isinstance(index, MultiIndex):
33223331
from pandas.core.sorting import lexsort_indexer
33233332

33243333
labels = index._sort_levels_monotonic()
3334+
codes = labels._get_codes_for_sorting()
3335+
33253336
indexer = lexsort_indexer(
3326-
labels._get_codes_for_sorting(),
3337+
codes,
33273338
orders=ascending,
33283339
na_position=na_position,
33293340
)
@@ -3340,6 +3351,10 @@ def sort_index(
33403351
else:
33413352
return self.copy()
33423353

3354+
if key is not None:
3355+
key_func = np.vectorize(key)
3356+
index = key_func(index)
3357+
33433358
indexer = nargsort(
33443359
index, kind=kind, ascending=ascending, na_position=na_position
33453360
)

0 commit comments

Comments
 (0)