From f1171a8ad0d5f5735fa7134f95227abe91295c42 Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Sat, 12 Dec 2020 17:47:06 +0100
Subject: [PATCH 1/6] DOC: Clarify that DataFrame.sort_values is stable for
 len(by)>1

---
 pandas/core/generic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 41cb76d88957e..472c505700327 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4367,9 +4367,9 @@ def sort_values(
              If True, perform operation in-place.
         kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
              Choice of sorting algorithm. See also ndarray.np.sort for more
-             information.  `mergesort` is the only stable algorithm. For
-             DataFrames, this option is only applied when sorting on a single
-             column or label.
+             information. `mergesort` is the only stable algorithm. For
+             DataFrames, if sorting by multiple columns or labels, this
+             argument is ignored, defaulting to a stable sorting algorithm.
         na_position : {'first', 'last'}, default 'last'
              Puts NaNs at the beginning if `first`; `last` puts NaNs at the
              end.

From 2e24f694dc5a157d0b552aee46ee1b7214b29f60 Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Tue, 22 Dec 2020 16:22:32 +0100
Subject: [PATCH 2/6] TST: Update test case to show multilabel sort is stable

---
 .../tests/frame/methods/test_sort_values.py   | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 1bb969956e074..904ab2a9c354f 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -219,25 +219,30 @@ def test_sort_values_stable_descending_sort(self):
 
     def test_sort_values_stable_descending_multicolumn_sort(self):
         df = DataFrame(
-            {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}
+            {
+                "A": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8],
+                "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4],
+            }
         )
-        # test stable mergesort
+        # test sorting is stable
         expected = DataFrame(
-            {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 2, 9]},
-            index=[2, 5, 4, 6, 1, 3, 0],
-        )
-        sorted_df = df.sort_values(
-            ["A", "B"], ascending=[0, 1], na_position="first", kind="mergesort"
+            {
+                "A": [np.nan, np.nan, np.nan, 8, 8, 8, 8, 8, 6, 4, 2, 1, 1, 1, 1],
+                "B": [np.nan, np.nan, 5, 3, 4, 4, 4, 4, 5, 5, np.nan, 2, 2, 2, 9],
+            },
+            index=[11, 12, 2, 9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
         )
+        sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="first")
         tm.assert_frame_equal(sorted_df, expected)
 
         expected = DataFrame(
-            {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]},
-            index=[2, 5, 4, 6, 1, 0, 3],
-        )
-        sorted_df = df.sort_values(
-            ["A", "B"], ascending=[0, 0], na_position="first", kind="mergesort"
+            {
+                "A": [8, 8, 8, 8, 8, 6, 4, 2, 1, 1, 1, 1, np.nan, np.nan, np.nan],
+                "B": [4, 4, 4, 4, 3, 5, 5, np.nan, 9, 2, 2, 2, 5, np.nan, np.nan],
+            },
+            index=[7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5, 2, 11, 12],
         )
+        sorted_df = df.sort_values(["A", "B"], ascending=[0, 0], na_position="last")
         tm.assert_frame_equal(sorted_df, expected)
 
     def test_sort_values_stable_categorial(self):

From 2addf1dd3191c9c652c502a8a5471b6c87379c01 Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Tue, 22 Dec 2020 17:21:09 +0100
Subject: [PATCH 3/6] TST: Parametrize sort options in multicol sort_values
 test

---
 .../tests/frame/methods/test_sort_values.py   | 78 ++++++++++++++-----
 1 file changed, 59 insertions(+), 19 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 904ab2a9c354f..c59f545c45d11 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -217,32 +217,72 @@ def test_sort_values_stable_descending_sort(self):
         sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False)
         tm.assert_frame_equal(df, sorted_df)
 
-    def test_sort_values_stable_descending_multicolumn_sort(self):
+    @pytest.mark.parametrize(
+        "expected_idx, ascending, na_position",
+        [
+            pytest.param(
+                [11, 12, 2, 3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14],
+                [True, True],
+                "first",
+                id="both_ascending_na_first",
+            ),
+            pytest.param(
+                [11, 12, 2, 0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9],
+                [True, False],
+                "first",
+                id="first_ascending_na_first",
+            ),
+            pytest.param(
+                [11, 12, 2, 9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
+                [False, True],
+                "first",
+                id="second_ascending_na_first",
+            ),
+            pytest.param(
+                [11, 12, 2, 7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5],
+                [False, False],
+                "first",
+                id="both_descending_na_first",
+            ),
+            pytest.param(
+                [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14, 2, 11, 12],
+                [True, True],
+                "last",
+                id="both_ascending_na_last",
+            ),
+            pytest.param(
+                [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9, 2, 11, 12],
+                [True, False],
+                "last",
+                id="first_ascending_na_last",
+            ),
+            pytest.param(
+                [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0, 2, 11, 12],
+                [False, True],
+                "last",
+                id="second_ascending_na_last",
+            ),
+            pytest.param(
+                [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5, 2, 11, 12],
+                [False, False],
+                "last",
+                id="both_descending_na_last",
+            ),
+        ],
+    )
+    def test_sort_values_stable_multicolumn_sort(
+        self, expected_idx, ascending, na_position
+    ):
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8],
                 "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4],
             }
         )
-        # test sorting is stable
-        expected = DataFrame(
-            {
-                "A": [np.nan, np.nan, np.nan, 8, 8, 8, 8, 8, 6, 4, 2, 1, 1, 1, 1],
-                "B": [np.nan, np.nan, 5, 3, 4, 4, 4, 4, 5, 5, np.nan, 2, 2, 2, 9],
-            },
-            index=[11, 12, 2, 9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
-        )
-        sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="first")
-        tm.assert_frame_equal(sorted_df, expected)
-
-        expected = DataFrame(
-            {
-                "A": [8, 8, 8, 8, 8, 6, 4, 2, 1, 1, 1, 1, np.nan, np.nan, np.nan],
-                "B": [4, 4, 4, 4, 3, 5, 5, np.nan, 9, 2, 2, 2, 5, np.nan, np.nan],
-            },
-            index=[7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5, 2, 11, 12],
+        expected = df.take(expected_idx)
+        sorted_df = df.sort_values(
+            ["A", "B"], ascending=ascending, na_position=na_position
         )
-        sorted_df = df.sort_values(["A", "B"], ascending=[0, 0], na_position="last")
         tm.assert_frame_equal(sorted_df, expected)
 
     def test_sort_values_stable_categorial(self):

From da6b97a523084f4ee5dd579328ba5b32d13ab4b0 Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Tue, 22 Dec 2020 18:31:05 +0100
Subject: [PATCH 4/6] TST: Simplified parametrization

---
 .../tests/frame/methods/test_sort_values.py   | 62 +++++++------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index c59f545c45d11..554b54178c9de 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -218,60 +218,39 @@ def test_sort_values_stable_descending_sort(self):
         tm.assert_frame_equal(df, sorted_df)
 
     @pytest.mark.parametrize(
-        "expected_idx, ascending, na_position",
+        "expected_idx_non_na, ascending",
         [
             pytest.param(
-                [11, 12, 2, 3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14],
+                [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14],
                 [True, True],
-                "first",
-                id="both_ascending_na_first",
+                id="both_ascending",
             ),
             pytest.param(
-                [11, 12, 2, 0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9],
+                [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9],
                 [True, False],
-                "first",
-                id="first_ascending_na_first",
+                id="first_ascending",
             ),
             pytest.param(
-                [11, 12, 2, 9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
+                [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
                 [False, True],
-                "first",
-                id="second_ascending_na_first",
+                id="second_ascending",
             ),
             pytest.param(
-                [11, 12, 2, 7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5],
+                [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5],
                 [False, False],
-                "first",
-                id="both_descending_na_first",
-            ),
-            pytest.param(
-                [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14, 2, 11, 12],
-                [True, True],
-                "last",
-                id="both_ascending_na_last",
-            ),
-            pytest.param(
-                [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9, 2, 11, 12],
-                [True, False],
-                "last",
-                id="first_ascending_na_last",
-            ),
-            pytest.param(
-                [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0, 2, 11, 12],
-                [False, True],
-                "last",
-                id="second_ascending_na_last",
-            ),
-            pytest.param(
-                [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5, 2, 11, 12],
-                [False, False],
-                "last",
-                id="both_descending_na_last",
+                id="both_descending",
             ),
         ],
     )
+    @pytest.mark.parametrize(
+        "na_position",
+        [
+            pytest.param("first", id="na_first"),
+            pytest.param("last", id="na_last"),
+        ],
+    )
     def test_sort_values_stable_multicolumn_sort(
-        self, expected_idx, ascending, na_position
+        self, expected_idx_non_na, ascending, na_position
     ):
         df = DataFrame(
             {
@@ -279,6 +258,13 @@ def test_sort_values_stable_multicolumn_sort(
                 "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4],
             }
         )
+        # All rows with NaN in col "B" only have unique value in "A", therefore,
+        # only the rows with NaNs in "A" have to be treated individually:
+        expected_idx = (
+            [11, 12, 2] + expected_idx_non_na
+            if na_position == "first"
+            else expected_idx_non_na + [2, 11, 12]
+        )
         expected = df.take(expected_idx)
         sorted_df = df.sort_values(
             ["A", "B"], ascending=ascending, na_position=na_position

From 33c1a93b8ac6a2979c3b2308ec274c74bbea97d4 Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Tue, 22 Dec 2020 19:01:11 +0100
Subject: [PATCH 5/6] TST: Removed parametrization IDs

---
 .../tests/frame/methods/test_sort_values.py   | 28 ++++++-------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 554b54178c9de..3916ef5697812 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -220,35 +220,25 @@ def test_sort_values_stable_descending_sort(self):
     @pytest.mark.parametrize(
         "expected_idx_non_na, ascending",
         [
-            pytest.param(
+            [
                 [3, 4, 5, 0, 1, 8, 6, 9, 7, 10, 13, 14],
                 [True, True],
-                id="both_ascending",
-            ),
-            pytest.param(
+            ],
+            [
                 [0, 3, 4, 5, 1, 8, 6, 7, 10, 13, 14, 9],
                 [True, False],
-                id="first_ascending",
-            ),
-            pytest.param(
+            ],
+            [
                 [9, 7, 10, 13, 14, 6, 8, 1, 3, 4, 5, 0],
                 [False, True],
-                id="second_ascending",
-            ),
-            pytest.param(
+            ],
+            [
                 [7, 10, 13, 14, 9, 6, 8, 1, 0, 3, 4, 5],
                 [False, False],
-                id="both_descending",
-            ),
-        ],
-    )
-    @pytest.mark.parametrize(
-        "na_position",
-        [
-            pytest.param("first", id="na_first"),
-            pytest.param("last", id="na_last"),
+            ],
         ],
     )
+    @pytest.mark.parametrize("na_position", ["first", "last"])
     def test_sort_values_stable_multicolumn_sort(
         self, expected_idx_non_na, ascending, na_position
     ):

From 911745a3bdef862be54eb539d711306432f53d9c Mon Sep 17 00:00:00 2001
From: Jonathan Tammo Siebert <github_repos@jonathansiebert.de>
Date: Tue, 22 Dec 2020 19:12:39 +0100
Subject: [PATCH 6/6] TST: Added reference to github PR

---
 pandas/tests/frame/methods/test_sort_values.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 3916ef5697812..987848ec697d1 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -242,13 +242,14 @@ def test_sort_values_stable_descending_sort(self):
     def test_sort_values_stable_multicolumn_sort(
         self, expected_idx_non_na, ascending, na_position
     ):
+        # GH#38426 Clarify sort_values with mult. columns / labels is stable
         df = DataFrame(
             {
                 "A": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8],
                 "B": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4],
             }
         )
-        # All rows with NaN in col "B" only have unique value in "A", therefore,
+        # All rows with NaN in col "B" only have unique values in "A", therefore,
         # only the rows with NaNs in "A" have to be treated individually:
         expected_idx = (
             [11, 12, 2] + expected_idx_non_na