Skip to content

Commit f738581

Browse files
charlesdong1991jreback
authored andcommitted
ENH: Add ignore_index for df.sort_values and series.sort_values (#30402)
1 parent 980d0da commit f738581

File tree

6 files changed

+93
-6
lines changed

6 files changed

+93
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ Other enhancements
207207
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
208208
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
209209
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
210+
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
210211

211212

212213
Build Changes

pandas/core/frame.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4731,6 +4731,7 @@ def sort_values(
47314731
inplace=False,
47324732
kind="quicksort",
47334733
na_position="last",
4734+
ignore_index=False,
47344735
):
47354736
inplace = validate_bool_kwarg(inplace, "inplace")
47364737
axis = self._get_axis_number(axis)
@@ -4764,6 +4765,9 @@ def sort_values(
47644765
indexer, axis=self._get_block_manager_axis(axis), verify=False
47654766
)
47664767

4768+
if ignore_index:
4769+
new_data.axes[1] = ibase.default_index(len(indexer))
4770+
47674771
if inplace:
47684772
return self._update_inplace(new_data)
47694773
else:

pandas/core/generic.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4050,6 +4050,7 @@ def sort_values(
40504050
inplace: bool_t = False,
40514051
kind: str = "quicksort",
40524052
na_position: str = "last",
4053+
ignore_index: bool_t = False,
40534054
):
40544055
"""
40554056
Sort by the values along either axis.
@@ -4072,6 +4073,10 @@ def sort_values(
40724073
na_position : {'first', 'last'}, default 'last'
40734074
Puts NaNs at the beginning if `first`; `last` puts NaNs at the
40744075
end.
4076+
ignore_index : bool, default False
4077+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
4078+
4079+
.. versionadded:: 1.0.0
40754080
40764081
Returns
40774082
-------

pandas/core/series.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2698,6 +2698,7 @@ def sort_values(
26982698
inplace=False,
26992699
kind="quicksort",
27002700
na_position="last",
2701+
ignore_index=False,
27012702
):
27022703
"""
27032704
Sort by the values.
@@ -2720,6 +2721,10 @@ def sort_values(
27202721
na_position : {'first' or 'last'}, default 'last'
27212722
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
27222723
the end.
2724+
ignore_index : bool, default False
2725+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
2726+
2727+
.. versionadded:: 1.0.0
27232728
27242729
Returns
27252730
-------
@@ -2825,7 +2830,7 @@ def _try_kind_sort(arr):
28252830
return arr.argsort(kind="quicksort")
28262831

28272832
arr = self._values
2828-
sortedIdx = np.empty(len(self), dtype=np.int32)
2833+
sorted_index = np.empty(len(self), dtype=np.int32)
28292834

28302835
bad = isna(arr)
28312836

@@ -2849,16 +2854,19 @@ def _try_kind_sort(arr):
28492854

28502855
if na_position == "last":
28512856
n = good.sum()
2852-
sortedIdx[:n] = idx[good][argsorted]
2853-
sortedIdx[n:] = idx[bad]
2857+
sorted_index[:n] = idx[good][argsorted]
2858+
sorted_index[n:] = idx[bad]
28542859
elif na_position == "first":
28552860
n = bad.sum()
2856-
sortedIdx[n:] = idx[good][argsorted]
2857-
sortedIdx[:n] = idx[bad]
2861+
sorted_index[n:] = idx[good][argsorted]
2862+
sorted_index[:n] = idx[bad]
28582863
else:
28592864
raise ValueError(f"invalid na_position: {na_position}")
28602865

2861-
result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx])
2866+
result = self._constructor(arr[sorted_index], index=self.index[sorted_index])
2867+
2868+
if ignore_index:
2869+
result.index = ibase.default_index(len(sorted_index))
28622870

28632871
if inplace:
28642872
self._update_inplace(result)

pandas/tests/frame/methods/test_sort_values.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,3 +460,45 @@ def test_sort_values_na_position_with_categories_raises(self):
460460

461461
with pytest.raises(ValueError):
462462
df.sort_values(by="c", ascending=False, na_position="bad_position")
463+
464+
@pytest.mark.parametrize(
465+
"original_dict, sorted_dict, ignore_index, output_index",
466+
[
467+
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
468+
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
469+
(
470+
{"A": [1, 2, 3], "B": [2, 3, 4]},
471+
{"A": [3, 2, 1], "B": [4, 3, 2]},
472+
True,
473+
[0, 1, 2],
474+
),
475+
(
476+
{"A": [1, 2, 3], "B": [2, 3, 4]},
477+
{"A": [3, 2, 1], "B": [4, 3, 2]},
478+
False,
479+
[2, 1, 0],
480+
),
481+
],
482+
)
483+
def test_sort_values_ignore_index(
484+
self, original_dict, sorted_dict, ignore_index, output_index
485+
):
486+
# GH 30114
487+
df = DataFrame(original_dict)
488+
expected = DataFrame(sorted_dict, index=output_index)
489+
490+
# Test when inplace is False
491+
sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index)
492+
tm.assert_frame_equal(sorted_df, expected)
493+
494+
tm.assert_frame_equal(df, DataFrame(original_dict))
495+
496+
# Test when inplace is True
497+
copied_df = df.copy()
498+
499+
copied_df.sort_values(
500+
"A", ascending=False, ignore_index=ignore_index, inplace=True
501+
)
502+
tm.assert_frame_equal(copied_df, expected)
503+
504+
tm.assert_frame_equal(df, DataFrame(original_dict))

pandas/tests/series/methods/test_sort_values.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,30 @@ def test_sort_values_categorical(self):
156156
result = df.sort_values(by=["grade", "id"])
157157
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
158158
tm.assert_frame_equal(result, expected)
159+
160+
@pytest.mark.parametrize(
161+
"original_list, sorted_list, ignore_index, output_index",
162+
[
163+
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
164+
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
165+
],
166+
)
167+
def test_sort_values_ignore_index(
168+
self, original_list, sorted_list, ignore_index, output_index
169+
):
170+
# GH 30114
171+
sr = Series(original_list)
172+
expected = Series(sorted_list, index=output_index)
173+
174+
# Test when inplace is False
175+
sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index)
176+
tm.assert_series_equal(sorted_sr, expected)
177+
178+
tm.assert_series_equal(sr, Series(original_list))
179+
180+
# Test when inplace is True
181+
copied_sr = sr.copy()
182+
copied_sr.sort_values(ascending=False, ignore_index=ignore_index, inplace=True)
183+
tm.assert_series_equal(copied_sr, expected)
184+
185+
tm.assert_series_equal(sr, Series(original_list))

0 commit comments

Comments
 (0)