From 43f817723dd338e1db9d96be670443dcf75171fb Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Thu, 29 Feb 2024 11:18:32 +0100
Subject: [PATCH 1/7] DOC:extended the documentation for
 `pandas.DataFrame.sort_values`; further explain the single-column vs.
 multi-column sorting; added further explanation and simplification for
 customized sorting, e.g, using `natsort` package

---
 pandas/core/frame.py | 46 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f530466c0fc30..5abe2a2720ef6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6819,7 +6819,9 @@ def sort_values(
         4    D     7     2    e
         5    C     4     3    F
 
-        Sort by col1
+        **Sort by a single column**
+
+        In this case, we are soring the rows according to values in ``col1``:
 
         >>> df.sort_values(by=["col1"])
           col1  col2  col3 col4
@@ -6830,7 +6832,11 @@ def sort_values(
         4    D     7     2    e
         3  NaN     8     4    D
 
-        Sort by multiple columns
+        **Sort by multiple columns**
+
+        You can also provide multiple columns to ``by`` argument, as shown below. In this example, the rows are 
+        first sorted according to ``col1``, and then the rows that have an identical value in ``col1`` are sorted according
+        to ``col2``.
 
         >>> df.sort_values(by=["col1", "col2"])
           col1  col2  col3 col4
@@ -6841,7 +6847,9 @@ def sort_values(
         4    D     7     2    e
         3  NaN     8     4    D
 
-        Sort Descending
+        **Sort in a descending order**
+
+        The sort order can be reversed using ``ascending`` argument, as shown below:
 
         >>> df.sort_values(by="col1", ascending=False)
           col1  col2  col3 col4
@@ -6852,7 +6860,10 @@ def sort_values(
         1    A     1     1    B
         3  NaN     8     4    D
 
-        Putting NAs first
+        **Placing any** ``NA`` **first**
+
+        Note that in the above example, the rows that contain an ``NA`` value in their ``col1`` are placed
+        at the end of the dataframe. This behavior can be modified via ``na_position`` argument, as shown below:
 
         >>> df.sort_values(by="col1", ascending=False, na_position="first")
           col1  col2  col3 col4
@@ -6863,7 +6874,10 @@ def sort_values(
         0    A     2     0    a
         1    A     1     1    B
 
-        Sorting with a key function
+        **Customized sort order**
+
+        The ``key`` argument allows for a further customization of sorting behaviour. For example, you may want 
+        to ignore the `letter's case <https://en.wikipedia.org/wiki/Letter_case>`__ when sorting strings:
 
         >>> df.sort_values(by="col4", key=lambda col: col.str.lower())
            col1  col2  col3 col4
@@ -6874,8 +6888,10 @@ def sort_values(
         4    D     7     2    e
         5    C     4     3    F
 
-        Natural sort with the key argument,
-        using the `natsort <https://github.com/SethMMorton/natsort>` package.
+        Another typical example is `natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__. 
+        This can be done using
+        ``natsort`` `package <https://github.com/SethMMorton/natsort>`__, which provides sorted indices according
+        to their natural order, as shown below:
 
         >>> df = pd.DataFrame(
         ...     {
@@ -6891,6 +6907,8 @@ def sort_values(
         3   48hr     40
         4   96hr     50
         >>> from natsort import index_natsorted
+        >>> index_natsorted(df["time"])
+        [0, 3, 2, 4, 1]
         >>> df.sort_values(
         ...     by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
         ... )
@@ -6900,6 +6918,20 @@ def sort_values(
         2   72hr     30
         4   96hr     50
         1  128hr     20
+
+        If sorted indices are not needed, you may simplify the procedure as follows:
+
+        >>> from natsort import natsort_keygen
+        ... df.sort_values(
+        ...    by="time",
+        ...    key=natsort_keygen(),
+        ... )
+            time  value
+        0    0hr     10
+        3   48hr     40
+        2   72hr     30
+        4   96hr     50
+        1  128hr     20
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)

From 8e968160f8c0abcddc8307faf5d90014df483391 Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Thu, 29 Feb 2024 13:10:25 +0100
Subject: [PATCH 2/7] shortened the added dostrings to 80 columns; fixed a typo

---
 pandas/core/frame.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5abe2a2720ef6..70b88701695ae 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6821,7 +6821,7 @@ def sort_values(
 
         **Sort by a single column**
 
-        In this case, we are soring the rows according to values in ``col1``:
+        In this case, we are sorting the rows according to values in ``col1``:
 
         >>> df.sort_values(by=["col1"])
           col1  col2  col3 col4
@@ -6834,8 +6834,9 @@ def sort_values(
 
         **Sort by multiple columns**
 
-        You can also provide multiple columns to ``by`` argument, as shown below. In this example, the rows are 
-        first sorted according to ``col1``, and then the rows that have an identical value in ``col1`` are sorted according
+        You can also provide multiple columns to ``by`` argument, as shown below.
+        In this example, the rows are first sorted according to ``col1``, and then
+        the rows that have an identical value in ``col1`` are sorted according
         to ``col2``.
 
         >>> df.sort_values(by=["col1", "col2"])
@@ -6862,8 +6863,9 @@ def sort_values(
 
         **Placing any** ``NA`` **first**
 
-        Note that in the above example, the rows that contain an ``NA`` value in their ``col1`` are placed
-        at the end of the dataframe. This behavior can be modified via ``na_position`` argument, as shown below:
+        Note that in the above example, the rows that contain an ``NA`` value in their
+        ``col1`` are placed at the end of the dataframe. This behavior can be modified
+        via ``na_position`` argument, as shown below:
 
         >>> df.sort_values(by="col1", ascending=False, na_position="first")
           col1  col2  col3 col4
@@ -6876,8 +6878,10 @@ def sort_values(
 
         **Customized sort order**
 
-        The ``key`` argument allows for a further customization of sorting behaviour. For example, you may want 
-        to ignore the `letter's case <https://en.wikipedia.org/wiki/Letter_case>`__ when sorting strings:
+        The ``key`` argument allows for a further customization of sorting behaviour.
+        For example, you may want
+        to ignore the `letter's case <https://en.wikipedia.org/wiki/Letter_case>`__
+        when sorting strings:
 
         >>> df.sort_values(by="col4", key=lambda col: col.str.lower())
            col1  col2  col3 col4
@@ -6888,9 +6892,11 @@ def sort_values(
         4    D     7     2    e
         5    C     4     3    F
 
-        Another typical example is `natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__. 
+        Another typical example is
+        `natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
         This can be done using
-        ``natsort`` `package <https://github.com/SethMMorton/natsort>`__, which provides sorted indices according
+        ``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
+        which provides sorted indices according
         to their natural order, as shown below:
 
         >>> df = pd.DataFrame(
@@ -6922,9 +6928,10 @@ def sort_values(
         If sorted indices are not needed, you may simplify the procedure as follows:
 
         >>> from natsort import natsort_keygen
+        ...
         ... df.sort_values(
-        ...    by="time",
-        ...    key=natsort_keygen(),
+        ...     by="time",
+        ...     key=natsort_keygen(),
         ... )
             time  value
         0    0hr     10

From 8e83eee0cbf655a3b13358cbd94ee8fd8d395aee Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Thu, 29 Feb 2024 14:12:46 +0100
Subject: [PATCH 3/7] added another `shell` line to avoid `micromamba` test
 failure

---
 pandas/core/frame.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 70b88701695ae..1fb1461f5f31c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6928,8 +6928,7 @@ def sort_values(
         If sorted indices are not needed, you may simplify the procedure as follows:
 
         >>> from natsort import natsort_keygen
-        ...
-        ... df.sort_values(
+        >>> df.sort_values(
         ...     by="time",
         ...     key=natsort_keygen(),
         ... )

From 3e198bb3bb0447e9941c319516de71f8206e7cc5 Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Fri, 1 Mar 2024 07:01:15 +0100
Subject: [PATCH 4/7] fixed a typo in a `DataFrame.sort_values()` example

---
 pandas/core/frame.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1fb1461f5f31c..37562ad9d6696 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6916,7 +6916,8 @@ def sort_values(
         >>> index_natsorted(df["time"])
         [0, 3, 2, 4, 1]
         >>> df.sort_values(
-        ...     by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
+        ...     by="time",
+        ...     key=lambda x: np.argsort(index_natsorted(x)),
         ... )
             time  value
         0    0hr     10

From 6749bb47534d5ace53cde5dc3314577c1b08a951 Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Fri, 1 Mar 2024 10:26:56 +0100
Subject: [PATCH 5/7] added a warning to raise awareness about a potential
 issue with `natsort`

---
 pandas/core/frame.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 37562ad9d6696..28876bcfbb3ca 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6939,6 +6939,13 @@ def sort_values(
         2   72hr     30
         4   96hr     50
         1  128hr     20
+
+        .. warning::
+           At the time of writing (1st March, 2024), ``natsort`` (v8.4.0) may fail
+           without error or warning in the case of multi-column sorting of
+           indices (e.g., ``DataFrame.sort_index()``, see
+           `here <https://github.com/SethMMorton/natsort/issues/172>`__ for further
+           details).
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)

From 22aa5880ec2c8364d285df3bd7d8dc8a39b33aa4 Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Sat, 2 Mar 2024 10:34:08 +0100
Subject: [PATCH 6/7] simplified the examples

---
 pandas/core/frame.py | 51 +-------------------------------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 28876bcfbb3ca..7905e3593e429 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6896,56 +6896,7 @@ def sort_values(
         `natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
         This can be done using
         ``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
-        which provides sorted indices according
-        to their natural order, as shown below:
-
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
-        ...         "value": [10, 20, 30, 40, 50],
-        ...     }
-        ... )
-        >>> df
-            time  value
-        0    0hr     10
-        1  128hr     20
-        2   72hr     30
-        3   48hr     40
-        4   96hr     50
-        >>> from natsort import index_natsorted
-        >>> index_natsorted(df["time"])
-        [0, 3, 2, 4, 1]
-        >>> df.sort_values(
-        ...     by="time",
-        ...     key=lambda x: np.argsort(index_natsorted(x)),
-        ... )
-            time  value
-        0    0hr     10
-        3   48hr     40
-        2   72hr     30
-        4   96hr     50
-        1  128hr     20
-
-        If sorted indices are not needed, you may simplify the procedure as follows:
-
-        >>> from natsort import natsort_keygen
-        >>> df.sort_values(
-        ...     by="time",
-        ...     key=natsort_keygen(),
-        ... )
-            time  value
-        0    0hr     10
-        3   48hr     40
-        2   72hr     30
-        4   96hr     50
-        1  128hr     20
-
-        .. warning::
-           At the time of writing (1st March, 2024), ``natsort`` (v8.4.0) may fail
-           without error or warning in the case of multi-column sorting of
-           indices (e.g., ``DataFrame.sort_index()``, see
-           `here <https://github.com/SethMMorton/natsort/issues/172>`__ for further
-           details).
+        which sorts according to natural order of elements.
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)

From e8769d39251333f053c3aa9a68c7c74542ed0d2c Mon Sep 17 00:00:00 2001
From: "Allahyar, Amin" <amin.allahyar@astrazeneca.com>
Date: Sat, 2 Mar 2024 15:58:54 +0100
Subject: [PATCH 7/7] added a single example about `natsort`

---
 pandas/core/frame.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7905e3593e429..b64aaf8537ad9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6896,7 +6896,35 @@ def sort_values(
         `natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
         This can be done using
         ``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
-        which sorts according to natural order of elements.
+        which provides sorted indices according
+        to their natural order, as shown below:
+
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
+        ...         "value": [10, 20, 30, 40, 50],
+        ...     }
+        ... )
+        >>> df
+            time  value
+        0    0hr     10
+        1  128hr     20
+        2   72hr     30
+        3   48hr     40
+        4   96hr     50
+        >>> from natsort import index_natsorted
+        >>> index_natsorted(df["time"])
+        [0, 3, 2, 4, 1]
+        >>> df.sort_values(
+        ...     by="time",
+        ...     key=lambda x: np.argsort(index_natsorted(x)),
+        ... )
+            time  value
+        0    0hr     10
+        3   48hr     40
+        2   72hr     30
+        4   96hr     50
+        1  128hr     20
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)