From 6db62fcb96c3a76caa49ce2eeaddeeb5036f2e72 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 24 Apr 2021 16:22:51 -0700
Subject: [PATCH 1/2] BUG: retain ordered Categorical dtype in SeriesGroupBy
 aggregations

---
 pandas/core/groupby/generic.py                | 41 ++++++++++++++-----
 .../tests/groupby/aggregate/test_aggregate.py |  9 ++++
 pandas/tests/groupby/test_categorical.py      | 29 ++++++++++++-
 3 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9d6d2d698dfe5..ebc0b8f9d23ef 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -360,9 +360,26 @@ def _cython_agg_general(
             if numeric_only and not is_numeric:
                 continue
 
-            result = self.grouper._cython_operation(
-                "aggregate", obj._values, how, axis=0, min_count=min_count
-            )
+            objvals = obj._values
+
+            if isinstance(objvals, Categorical):
+                if self.grouper.ngroups > 0:
+                    # without special-casing, we would raise, then in fallback
+                    # would eventually call agg_series but without re-casting
+                    # to Categorical
+                    # equiv: res_values, _ = self.grouper.agg_series(obj, alt)
+                    res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
+                else:
+                    # equiv: res_values = self._python_agg_general(alt)
+                    res_values = self._python_apply_general(alt, self._selected_obj)
+
+                result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
+
+            else:
+                result = self.grouper._cython_operation(
+                    "aggregate", obj._values, how, axis=0, min_count=min_count
+                )
+
             assert result.ndim == 1
             key = base.OutputKey(label=name, position=idx)
             output[key] = result
@@ -1138,13 +1155,7 @@ def cast_agg_result(
             # see if we can cast the values to the desired dtype
             # this may not be the original dtype
 
-            if isinstance(values, Categorical) and isinstance(result, np.ndarray):
-                # If the Categorical op didn't raise, it is dtype-preserving
-                # We get here with how="first", "last", "min", "max"
-                result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
-                # Note this will have result.dtype == dtype from above
-
-            elif (
+            if (
                 not using_array_manager
                 and isinstance(result, np.ndarray)
                 and result.ndim == 1
@@ -1184,7 +1195,15 @@ def py_fallback(values: ArrayLike) -> ArrayLike:
             # Categoricals. This will done by later self._reindex_output()
             # Doing it here creates an error. See GH#34951
             sgb = get_groupby(obj, self.grouper, observed=True)
-            result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+            if sgb.ndim == 2:
+                # equiv: result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+                result = sgb._python_agg_general(lambda x: alt(x, axis=self.axis))
+            else:
+                # use _agg_general bc it will go through _cython_agg_general
+                #  which will correctly cast Categoricals.
+                result = sgb._agg_general(
+                    numeric_only=False, min_count=min_count, alias=how, npfunc=alt
+                )
 
             assert isinstance(result, (Series, DataFrame))  # for mypy
             # In the case of object dtype block, it may have been split
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 28344897a686f..c61360e67e505 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1105,6 +1105,11 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 
     expected_df = DataFrame(data=exp_data, index=cat_index)
 
+    if "cat_ord" in expected_df:
+        # ordered categorical columns should be preserved
+        dtype = input_df["cat_ord"].dtype
+        expected_df["cat_ord"] = expected_df["cat_ord"].astype(dtype)
+
     tm.assert_frame_equal(result_df, expected_df)
 
 
@@ -1149,6 +1154,10 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
     multi_index = MultiIndex.from_tuples(tuple(multi_index_list))
 
     expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index)
+    for col in expected_df.columns:
+        if isinstance(col, tuple) and "cat_ord" in col:
+            # ordered categorical should be preserved
+            expected_df[col] = expected_df[col].astype(input_df["cat_ord"].dtype)
 
     tm.assert_frame_equal(result_df, expected_df)
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index da438826a939a..0509b3e165bcb 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -800,6 +800,12 @@ def test_preserve_on_ordered_ops(func, values):
     ).set_index("payload")
     tm.assert_frame_equal(result, expected)
 
+    # we should also preserve categorical for SeriesGroupBy
+    sgb = df.groupby("payload")["col"]
+    result = getattr(sgb, func)()
+    expected = expected["col"]
+    tm.assert_series_equal(result, expected)
+
 
 def test_categorical_no_compress():
     data = Series(np.random.randn(9))
@@ -1494,7 +1500,11 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func):
     df = DataFrame({"A": [1997], "B": Series(["b"], dtype="category").cat.as_ordered()})
     df_grouped = df.groupby("A")["B"]
     result = getattr(df_grouped, func)()
-    expected = Series(["b"], index=Index([1997], name="A"), name="B")
+
+    # ordered categorical dtype should be preserved
+    expected = Series(
+        ["b"], index=Index([1997], name="A"), name="B", dtype=df["B"].dtype
+    )
     tm.assert_series_equal(result, expected)
 
 
@@ -1561,7 +1571,15 @@ def test_agg_cython_category_not_implemented_fallback():
     df["col_cat"] = df["col_num"].astype("category")
 
     result = df.groupby("col_num").col_cat.first()
-    expected = Series([1, 2, 3], index=Index([1, 2, 3], name="col_num"), name="col_cat")
+
+    # ordered categorical dtype should definitely be preserved;
+    #  this is unordered, so is less-clear case (if anything, it should raise)
+    expected = Series(
+        [1, 2, 3],
+        index=Index([1, 2, 3], name="col_num"),
+        name="col_cat",
+        dtype=df["col_cat"].dtype,
+    )
     tm.assert_series_equal(result, expected)
 
     result = df.groupby("col_num").agg({"col_cat": "first"})
@@ -1576,6 +1594,10 @@ def test_aggregate_categorical_lost_index(func: str):
     df = DataFrame({"A": [1997], "B": ds})
     result = df.groupby("A").agg({"B": func})
     expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A"))
+
+    # ordered categorical dtype should be preserved
+    expected["B"] = expected["B"].astype(ds.dtype)
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -1653,6 +1675,9 @@ def test_categorical_transform():
 
     expected["status"] = expected["status"].astype(delivery_status_type)
 
+    # .transform(max) should preserve ordered categoricals
+    expected["last_status"] = expected["last_status"].astype(delivery_status_type)
+
     tm.assert_frame_equal(result, expected)
 
 

From 0addcb304c2afc266367b94d14a8120119b8523c Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 27 Apr 2021 14:05:35 -0700
Subject: [PATCH 2/2] whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 7189c6e68d53d..dff4d32969c18 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -850,6 +850,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`)
 - Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
 - Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
+- Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`)
 
 Reshaping
 ^^^^^^^^^