From e3b0b5d7d250f46d2e7be928e44b98f48bde72c5 Mon Sep 17 00:00:00 2001
From: MartinBraquet <martin.braquet@gmail.com>
Date: Thu, 20 Mar 2025 14:12:05 +0700
Subject: [PATCH 1/6] Fix bug in `~Series.describe` where median percentile is
 included when the `percentiles` argument is passed

---
 doc/source/whatsnew/v3.0.0.rst              |  1 +
 pandas/core/methods/describe.py             | 11 ++++----
 pandas/io/formats/format.py                 |  3 +++
 pandas/tests/frame/methods/test_describe.py | 30 +++++++++++++++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index b4aa6447c0a1b..cd0eb5183ebd0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -838,6 +838,7 @@ Other
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
 - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
 - Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`)
+- Bug in :meth:`Series.describe` where median percentile is included when the ``percentiles`` argument is passed (:issue:`60550`).
 - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
 - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
index 17d4d38c97f33..944e28a9b0238 100644
--- a/pandas/core/methods/describe.py
+++ b/pandas/core/methods/describe.py
@@ -229,10 +229,15 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
 
     formatted_percentiles = format_percentiles(percentiles)
 
+    if len(percentiles) == 0:
+        quantiles = []
+    else:
+        quantiles = series.quantile(percentiles).tolist()
+
     stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
     d = (
         [series.count(), series.mean(), series.std(), series.min()]
-        + series.quantile(percentiles).tolist()
+        + quantiles
         + [series.max()]
     )
     # GH#48340 - always return float on non-complex numeric data
@@ -354,10 +359,6 @@ def _refine_percentiles(
     # get them all to be in [0, 1]
     validate_percentile(percentiles)
 
-    # median should always be included
-    if 0.5 not in percentiles:
-        percentiles.append(0.5)
-
     percentiles = np.asarray(percentiles)
 
     # sort and check for duplicates
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index b7fbc4e5e22b7..fb799361fea67 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1565,6 +1565,9 @@ def format_percentiles(
     >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
     ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
     """
+    if len(percentiles) == 0:
+        return []
+
     percentiles = np.asarray(percentiles)
 
     # It checks for np.nan as well
diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
index e9206e86b7b08..28c3fd8d89a43 100644
--- a/pandas/tests/frame/methods/test_describe.py
+++ b/pandas/tests/frame/methods/test_describe.py
@@ -413,3 +413,33 @@ def test_describe_exclude_pa_dtype(self):
             dtype=pd.ArrowDtype(pa.float64()),
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("percentiles", [None, [], [0.2]])
+    def test_refine_percentiles(self, percentiles):
+        """
+        Test that the percentiles are returned correctly depending on the `percentiles`
+        argument.
+        - The default behavior is to return the 25th, 50th, and 75 percentiles
+        - If `percentiles` is an empty list, no percentiles are returned
+        - If `percentiles` is a non-empty list, only those percentiles are returned
+        """
+        # GH#60550
+        df = DataFrame({"a": np.arange(0, 10, 1)})
+
+        result = df.describe(percentiles=percentiles)
+
+        if percentiles is None:
+            percentiles = [0.25, 0.5, 0.75]
+
+        expected = Series(
+            {
+                "count": len(df.a),
+                "mean": df.a.mean(),
+                "std": df.a.std(),
+                "min": df.a.min(),
+                **{f"{p:.0%}": df.a.quantile(p) for p in percentiles},
+                "max": df.a.max(),
+            },
+        ).to_frame(name="a")
+
+        tm.assert_frame_equal(result, expected)

From 8bb3cf36d8e1ceb3c7751e387bec4a0a86912495 Mon Sep 17 00:00:00 2001
From: MartinBraquet <martin.braquet@gmail.com>
Date: Fri, 21 Mar 2025 09:41:55 +0700
Subject: [PATCH 2/6] Refine docstrings

---
 pandas/core/generic.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0c3f535df9ce2..e85402a46d6ec 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -10818,9 +10818,12 @@ def describe(
         ----------
         percentiles : list-like of numbers, optional
             The percentiles to include in the output. All should
-            fall between 0 and 1. The default is
-            ``[.25, .5, .75]``, which returns the 25th, 50th, and
-            75th percentiles.
+            fall between 0 and 1. Here are the options:
+
+            - A list-like of numbers : To include the percentiles listed. If
+              that list is empty, no percentiles will be returned.
+            - None (default) : To include the default percentiles, which are the
+              25th, 50th, and 75th ones.
         include : 'all', list-like of dtypes or None (default), optional
             A white list of data types to include in the result. Ignored
             for ``Series``. Here are the options:

From a0a0c630adc74f19dabd1f9d8454ac4b81b84a19 Mon Sep 17 00:00:00 2001
From: MartinBraquet <martin.braquet@gmail.com>
Date: Fri, 21 Mar 2025 10:28:32 +0700
Subject: [PATCH 3/6] Update test_describe in groupby

---
 pandas/tests/groupby/methods/test_describe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py
index 6c4b913574d9e..72bdfebc5eeb7 100644
--- a/pandas/tests/groupby/methods/test_describe.py
+++ b/pandas/tests/groupby/methods/test_describe.py
@@ -202,15 +202,15 @@ def test_describe_duplicate_columns():
     gb = df.groupby(df[1])
     result = gb.describe(percentiles=[])
 
-    columns = ["count", "mean", "std", "min", "50%", "max"]
+    columns = ["count", "mean", "std", "min", "max"]
     frames = [
-        DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
+        DataFrame([[1.0, val, np.nan, val, val]], index=[1], columns=columns)
         for val in (0.0, 2.0, 3.0)
     ]
     expected = pd.concat(frames, axis=1)
     expected.columns = MultiIndex(
         levels=[[0, 2], columns],
-        codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
+        codes=[5 * [0] + 5 * [1] + 5 * [0], 3 * list(range(5))],
     )
     expected.index.names = [1]
     tm.assert_frame_equal(result, expected)

From bf1effa1ad3f3f88b42ccfec2fdc305eabc51e93 Mon Sep 17 00:00:00 2001
From: MartinBraquet <martin.braquet@gmail.com>
Date: Fri, 21 Mar 2025 23:14:55 +0700
Subject: [PATCH 4/6] Minor fixes

---
 pandas/core/generic.py                      | 8 ++------
 pandas/tests/frame/methods/test_describe.py | 5 +++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e85402a46d6ec..a7a287de0241e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -10818,12 +10818,8 @@ def describe(
         ----------
         percentiles : list-like of numbers, optional
             The percentiles to include in the output. All should
-            fall between 0 and 1. Here are the options:
-
-            - A list-like of numbers : To include the percentiles listed. If
-              that list is empty, no percentiles will be returned.
-            - None (default) : To include the default percentiles, which are the
-              25th, 50th, and 75th ones.
+            fall between 0 and 1. The default, ``None``, will automatically
+            return the 25th, 50th, and 75th percentiles.
         include : 'all', list-like of dtypes or None (default), optional
             A white list of data types to include in the result. Ignored
             for ``Series``. Here are the options:
diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
index 28c3fd8d89a43..401b65ed61a25 100644
--- a/pandas/tests/frame/methods/test_describe.py
+++ b/pandas/tests/frame/methods/test_describe.py
@@ -431,7 +431,7 @@ def test_refine_percentiles(self, percentiles):
         if percentiles is None:
             percentiles = [0.25, 0.5, 0.75]
 
-        expected = Series(
+        expected = DataFrame(
             {
                 "count": len(df.a),
                 "mean": df.a.mean(),
@@ -440,6 +440,7 @@ def test_refine_percentiles(self, percentiles):
                 **{f"{p:.0%}": df.a.quantile(p) for p in percentiles},
                 "max": df.a.max(),
             },
-        ).to_frame(name="a")
+            index=["a"],
+        ).T
 
         tm.assert_frame_equal(result, expected)

From 28756ad16298aa8d3f6ab468bc9b7b6849182da8 Mon Sep 17 00:00:00 2001
From: Martin Braquet <martin.braquet@gmail.com>
Date: Fri, 21 Mar 2025 23:17:24 +0700
Subject: [PATCH 5/6] Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index cd0eb5183ebd0..bad06329c4bfa 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -838,7 +838,7 @@ Other
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
 - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
 - Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`)
-- Bug in :meth:`Series.describe` where median percentile is included when the ``percentiles`` argument is passed (:issue:`60550`).
+- Bug in :meth:`Series.describe` where median percentile was always included when the ``percentiles`` argument was passed (:issue:`60550`).
 - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
 - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)

From 5ed786c67458e7840aff4e1b3e3a9a82dc294328 Mon Sep 17 00:00:00 2001
From: MartinBraquet <martin.braquet@gmail.com>
Date: Sat, 22 Mar 2025 02:01:52 +0700
Subject: [PATCH 6/6] Refactor expected df to avoid transpose

---
 pandas/tests/frame/methods/test_describe.py | 30 ++++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
index 401b65ed61a25..50656ca85e90a 100644
--- a/pandas/tests/frame/methods/test_describe.py
+++ b/pandas/tests/frame/methods/test_describe.py
@@ -432,15 +432,25 @@ def test_refine_percentiles(self, percentiles):
             percentiles = [0.25, 0.5, 0.75]
 
         expected = DataFrame(
-            {
-                "count": len(df.a),
-                "mean": df.a.mean(),
-                "std": df.a.std(),
-                "min": df.a.min(),
-                **{f"{p:.0%}": df.a.quantile(p) for p in percentiles},
-                "max": df.a.max(),
-            },
-            index=["a"],
-        ).T
+            [
+                len(df.a),
+                df.a.mean(),
+                df.a.std(),
+                df.a.min(),
+                *[df.a.quantile(p) for p in percentiles],
+                df.a.max(),
+            ],
+            index=pd.Index(
+                [
+                    "count",
+                    "mean",
+                    "std",
+                    "min",
+                    *[f"{p:.0%}" for p in percentiles],
+                    "max",
+                ]
+            ),
+            columns=["a"],
+        )
 
         tm.assert_frame_equal(result, expected)