From 800fd17ff9b81cda97d3026ee3eebd331dbe0171 Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <ivanovmg@gmail.com>
Date: Wed, 20 Jan 2021 11:13:25 +0700
Subject: [PATCH 1/4] REF: split describe_categorical_1d

---
 pandas/core/describe.py | 157 ++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 77 deletions(-)

diff --git a/pandas/core/describe.py b/pandas/core/describe.py
index 09862b72c4a4f..78b1603d98198 100644
--- a/pandas/core/describe.py
+++ b/pandas/core/describe.py
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Optional, Sequence, Union, cast
+from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union, cast
 import warnings
 
 import numpy as np
@@ -113,12 +113,11 @@ class SeriesDescriber(NDFrameDescriberAbstract):
     obj: "Series"
 
     def describe(self, percentiles: Sequence[float]) -> Series:
-        return describe_1d(
+        describe_func = select_describe_func(
             self.obj,
-            percentiles=percentiles,
-            datetime_is_numeric=self.datetime_is_numeric,
-            is_series=True,
+            self.datetime_is_numeric,
         )
+        return describe_func(self.obj, percentiles)
 
 
 class DataFrameDescriber(NDFrameDescriberAbstract):
@@ -155,15 +154,10 @@ def __init__(
     def describe(self, percentiles: Sequence[float]) -> DataFrame:
         data = self._select_data()
 
-        ldesc = [
-            describe_1d(
-                series,
-                percentiles=percentiles,
-                datetime_is_numeric=self.datetime_is_numeric,
-                is_series=False,
-            )
-            for _, series in data.items()
-        ]
+        ldesc: List["Series"] = []
+        for _, series in data.items():
+            describe_func = select_describe_func(series, self.datetime_is_numeric)
+            ldesc.append(describe_func(series, percentiles))
 
         col_names = reorder_columns(ldesc)
         d = concat(
@@ -231,55 +225,73 @@ def describe_numeric_1d(series: "Series", percentiles: Sequence[float]) -> Serie
     return Series(d, index=stat_index, name=series.name)
 
 
-def describe_categorical_1d(data: "Series", is_series: bool) -> Series:
+def describe_categorical_1d(
+    data: "Series",
+    percentiles_ignored: Sequence[float],
+) -> Series:
     """Describe series containing categorical data.
 
     Parameters
     ----------
     data : Series
         Series to be described.
-    is_series : bool
-        True if the original object is a Series.
-        False if the one column of the DataFrame is described.
+    percentiles : list-like of numbers
+        Ignored, but in place to unify interface.
+    """
+    names = ["count", "unique", "top", "freq"]
+    objcounts = data.value_counts()
+    count_unique = len(objcounts[objcounts != 0])
+    if count_unique > 0:
+        top, freq = objcounts.index[0], objcounts.iloc[0]
+        dtype = None
+    else:
+        # If the DataFrame is empty, set 'top' and 'freq' to None
+        # to maintain output shape consistency
+        top, freq = np.nan, np.nan
+        dtype = "object"
+
+    result = [data.count(), count_unique, top, freq]
+
+    from pandas import Series
+
+    return Series(result, index=names, name=data.name, dtype=dtype)
+
+
+def describe_timestamp_as_categorical_1d(
+    data: "Series",
+    percentiles_ignored: Sequence[float],
+) -> Series:
+    """Describe series containing timestamp data treated as categorical.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles : list-like of numbers
+        Ignored, but in place to unify interface.
     """
     names = ["count", "unique"]
     objcounts = data.value_counts()
     count_unique = len(objcounts[objcounts != 0])
     result = [data.count(), count_unique]
     dtype = None
-    if result[1] > 0:
+    if count_unique > 0:
         top, freq = objcounts.index[0], objcounts.iloc[0]
-        if is_datetime64_any_dtype(data.dtype):
-            if is_series:
-                stacklevel = 6
-            else:
-                stacklevel = 7
-            warnings.warn(
-                "Treating datetime data as categorical rather than numeric in "
-                "`.describe` is deprecated and will be removed in a future "
-                "version of pandas. Specify `datetime_is_numeric=True` to "
-                "silence this warning and adopt the future behavior now.",
-                FutureWarning,
-                stacklevel=stacklevel,
-            )
-            tz = data.dt.tz
-            asint = data.dropna().values.view("i8")
-            top = Timestamp(top)
-            if top.tzinfo is not None and tz is not None:
-                # Don't tz_localize(None) if key is already tz-aware
-                top = top.tz_convert(tz)
-            else:
-                top = top.tz_localize(tz)
-            names += ["top", "freq", "first", "last"]
-            result += [
-                top,
-                freq,
-                Timestamp(asint.min(), tz=tz),
-                Timestamp(asint.max(), tz=tz),
-            ]
+        tz = data.dt.tz
+        asint = data.dropna().values.view("i8")
+        top = Timestamp(top)
+        if top.tzinfo is not None and tz is not None:
+            # Don't tz_localize(None) if key is already tz-aware
+            top = top.tz_convert(tz)
         else:
-            names += ["top", "freq"]
-            result += [top, freq]
+            top = top.tz_localize(tz)
+        names += ["top", "freq", "first", "last"]
+        result += [
+            top,
+            freq,
+            Timestamp(asint.min(), tz=tz),
+            Timestamp(asint.max(), tz=tz),
+        ]
 
     # If the DataFrame is empty, set 'top' and 'freq' to None
     # to maintain output shape consistency
@@ -317,41 +329,32 @@ def describe_timestamp_1d(data: "Series", percentiles: Sequence[float]) -> Serie
     return Series(d, index=stat_index, name=data.name)
 
 
-def describe_1d(
+def select_describe_func(
     data: "Series",
-    percentiles: Sequence[float],
     datetime_is_numeric: bool,
-    *,
-    is_series: bool,
-) -> Series:
-    """Describe series.
-
-    Parameters
-    ----------
-    data : Series
-        Series to be described.
-    percentiles : list-like of numbers
-        The percentiles to include in the output.
-    datetime_is_numeric : bool, default False
-        Whether to treat datetime dtypes as numeric.
-    is_series : bool
-        True if the original object is a Series.
-        False if the one column of the DataFrame is described.
-
-    Returns
-    -------
-    Series
-    """
+) -> Callable:
     if is_bool_dtype(data.dtype):
-        return describe_categorical_1d(data, is_series)
+        describe_func = describe_categorical_1d
     elif is_numeric_dtype(data):
-        return describe_numeric_1d(data, percentiles)
+        describe_func = describe_numeric_1d
     elif is_datetime64_any_dtype(data.dtype) and datetime_is_numeric:
-        return describe_timestamp_1d(data, percentiles)
+        describe_func = describe_timestamp_1d
     elif is_timedelta64_dtype(data.dtype):
-        return describe_numeric_1d(data, percentiles)
+        describe_func = describe_numeric_1d
     else:
-        return describe_categorical_1d(data, is_series)
+        describe_func = describe_categorical_1d
+
+    if describe_func == describe_categorical_1d and is_datetime64_any_dtype(data.dtype):
+        warnings.warn(
+            "Treating datetime data as categorical rather than numeric in "
+            "`.describe` is deprecated and will be removed in a future "
+            "version of pandas. Specify `datetime_is_numeric=True` to "
+            "silence this warning and adopt the future behavior now.",
+            FutureWarning,
+            stacklevel=5,
+        )
+        describe_func = describe_timestamp_as_categorical_1d
+    return describe_func
 
 
 def refine_percentiles(percentiles: Optional[Sequence[float]]) -> Sequence[float]:

From 48270c5c820c31ffaa8a443b16790cbf4711fc87 Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <ivanovmg@gmail.com>
Date: Wed, 20 Jan 2021 13:47:51 +0700
Subject: [PATCH 2/4] DOC: add docstring to select_describe_func

---
 pandas/core/describe.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pandas/core/describe.py b/pandas/core/describe.py
index 78b1603d98198..2e14c7b7ed0ae 100644
--- a/pandas/core/describe.py
+++ b/pandas/core/describe.py
@@ -333,6 +333,17 @@ def select_describe_func(
     data: "Series",
     datetime_is_numeric: bool,
 ) -> Callable:
+    """Select proper function for describing series based on data type.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    datetime_is_numeric : bool
+        Whether to treat datetime dtypes as numeric.
+    """
+    describe_func: Callable
+
     if is_bool_dtype(data.dtype):
         describe_func = describe_categorical_1d
     elif is_numeric_dtype(data):

From 83fa0b53cf7671c7144a56ba031c691d2fbc5e10 Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <ivanovmg@gmail.com>
Date: Wed, 20 Jan 2021 13:49:10 +0700
Subject: [PATCH 3/4] REF: simplify logic in select_describe_func

---
 pandas/core/describe.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/core/describe.py b/pandas/core/describe.py
index 2e14c7b7ed0ae..a31858f1a7afc 100644
--- a/pandas/core/describe.py
+++ b/pandas/core/describe.py
@@ -348,23 +348,24 @@ def select_describe_func(
         describe_func = describe_categorical_1d
     elif is_numeric_dtype(data):
         describe_func = describe_numeric_1d
-    elif is_datetime64_any_dtype(data.dtype) and datetime_is_numeric:
-        describe_func = describe_timestamp_1d
+    elif is_datetime64_any_dtype(data.dtype):
+        if datetime_is_numeric:
+            describe_func = describe_timestamp_1d
+        else:
+            warnings.warn(
+                "Treating datetime data as categorical rather than numeric in "
+                "`.describe` is deprecated and will be removed in a future "
+                "version of pandas. Specify `datetime_is_numeric=True` to "
+                "silence this warning and adopt the future behavior now.",
+                FutureWarning,
+                stacklevel=5,
+            )
+            describe_func = describe_timestamp_as_categorical_1d
     elif is_timedelta64_dtype(data.dtype):
         describe_func = describe_numeric_1d
     else:
         describe_func = describe_categorical_1d
 
-    if describe_func == describe_categorical_1d and is_datetime64_any_dtype(data.dtype):
-        warnings.warn(
-            "Treating datetime data as categorical rather than numeric in "
-            "`.describe` is deprecated and will be removed in a future "
-            "version of pandas. Specify `datetime_is_numeric=True` to "
-            "silence this warning and adopt the future behavior now.",
-            FutureWarning,
-            stacklevel=5,
-        )
-        describe_func = describe_timestamp_as_categorical_1d
     return describe_func
 
 

From 18c80d31815b2094c907dc4af49893db8b011ede Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <ivanovmg@gmail.com>
Date: Wed, 20 Jan 2021 13:53:15 +0700
Subject: [PATCH 4/4] DOC: update param names in categorical docstring

---
 pandas/core/describe.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/pandas/core/describe.py b/pandas/core/describe.py
index a31858f1a7afc..3eafdafa99518 100644
--- a/pandas/core/describe.py
+++ b/pandas/core/describe.py
@@ -235,7 +235,7 @@ def describe_categorical_1d(
     ----------
     data : Series
         Series to be described.
-    percentiles : list-like of numbers
+    percentiles_ignored : list-like of numbers
         Ignored, but in place to unify interface.
     """
     names = ["count", "unique", "top", "freq"]
@@ -267,7 +267,7 @@ def describe_timestamp_as_categorical_1d(
     ----------
     data : Series
         Series to be described.
-    percentiles : list-like of numbers
+    percentiles_ignored : list-like of numbers
         Ignored, but in place to unify interface.
     """
     names = ["count", "unique"]
@@ -342,15 +342,13 @@ def select_describe_func(
     datetime_is_numeric : bool
         Whether to treat datetime dtypes as numeric.
     """
-    describe_func: Callable
-
     if is_bool_dtype(data.dtype):
-        describe_func = describe_categorical_1d
+        return describe_categorical_1d
     elif is_numeric_dtype(data):
-        describe_func = describe_numeric_1d
+        return describe_numeric_1d
     elif is_datetime64_any_dtype(data.dtype):
         if datetime_is_numeric:
-            describe_func = describe_timestamp_1d
+            return describe_timestamp_1d
         else:
             warnings.warn(
                 "Treating datetime data as categorical rather than numeric in "
@@ -360,13 +358,11 @@ def select_describe_func(
                 FutureWarning,
                 stacklevel=5,
             )
-            describe_func = describe_timestamp_as_categorical_1d
+            return describe_timestamp_as_categorical_1d
     elif is_timedelta64_dtype(data.dtype):
-        describe_func = describe_numeric_1d
+        return describe_numeric_1d
     else:
-        describe_func = describe_categorical_1d
-
-    return describe_func
+        return describe_categorical_1d
 
 
 def refine_percentiles(percentiles: Optional[Sequence[float]]) -> Sequence[float]: