From b021bb2cdc99d360bb59b608425892218596d198 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 27 Jan 2023 16:49:02 -0800
Subject: [PATCH 1/6] BUG: GroupBy.min/max with unordered Categorical and no
 groups

---
 doc/source/whatsnew/v2.0.0.rst       |  1 +
 pandas/core/groupby/ops.py           | 15 +++---
 pandas/tests/groupby/test_groupby.py | 81 +++++++++++++---------------
 3 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index e90717efc10b5..08890018804e2 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -958,6 +958,7 @@ Categorical
 - Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
 - Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
+- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`??`)
 -
 
 Datetimelike
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ced3190206f37..6aa52679c6526 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -227,9 +227,10 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
 
         Raises
         ------
+        TypeError
+            This is not a valid operation for this dtype.
         NotImplementedError
-            This is either not a valid function for this dtype, or
-            valid but not implemented in cython.
+            This may be a valid operation, but does not have a cython implementation.
         """
         how = self.how
 
@@ -238,16 +239,16 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
             return
 
         if isinstance(dtype, CategoricalDtype):
-            # NotImplementedError for methods that can fall back to a
-            #  non-cython implementation.
             if how in ["sum", "prod", "cumsum", "cumprod"]:
                 raise TypeError(f"{dtype} type does not support {how} operations")
+            if how in ["min", "max", "rank"] and not dtype.ordered:
+                # raise TypeError instead of NotImplementedError to ensure we
+                #  don't go down a group-by-group path, since in the empty-groups
+                #  case that would fail to raise
+                raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
             if how not in ["rank"]:
                 # only "rank" is implemented in cython
                 raise NotImplementedError(f"{dtype} dtype not supported")
-            if not dtype.ordered:
-                # TODO: TypeError?
-                raise NotImplementedError(f"{dtype} dtype not supported")
 
         elif is_sparse(dtype):
             raise NotImplementedError(f"{dtype} dtype not supported")
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e9b18836e003c..5a41b3bdbdc7b 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1901,25 +1901,12 @@ def test_empty_groupby(
             raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
         )
         request.node.add_marker(mark)
-    elif (
-        isinstance(values, Categorical)
-        and len(keys) == 1
-        and not isinstance(columns, list)
-    ):
-        mark = pytest.mark.xfail(
-            raises=TypeError, match="'Categorical' does not implement"
-        )
-        request.node.add_marker(mark)
     elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
         mark = pytest.mark.xfail(
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
         request.node.add_marker(mark)
-    elif (
-        isinstance(values, Categorical)
-        and len(keys) == 2
-        and op in ["min", "max", "sum"]
-    ):
+    elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]:
         mark = pytest.mark.xfail(
             raises=AssertionError, match="(DataFrame|Series) are different"
         )
@@ -1949,6 +1936,31 @@ def get_result(**kwargs):
         else:
             return getattr(gb, method)(op, **kwargs)
 
+    if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
+        msg = f"Cannot perform {op} with non-ordered Categorical"
+        with pytest.raises(TypeError, match=msg):
+            get_result()
+
+        if isinstance(columns, list):
+            # i.e. DataframeGroupBy, not SeriesGroupBy
+            result = get_result(numeric_only=True)
+
+            # Categorical is special without 'observed=True', we get an NaN entry
+            #  corresponding to the unobserved group. If we passed observed=True
+            #  to groupby, expected would just be 'df.set_index(keys)[columns]'
+            #  as below
+            lev = Categorical([0], dtype=values.dtype)
+            if len(keys) != 1:
+                idx = MultiIndex.from_product([lev, lev], names=keys)
+            else:
+                # all columns are dropped, but we end up with one row
+                # Categorical is special without 'observed=True'
+                idx = Index(lev, name=keys[0])
+
+            expected = DataFrame([], columns=[], index=idx)
+            tm.assert_equal(result, expected)
+        return
+
     if columns == "C":
         # i.e. SeriesGroupBy
         if op in ["prod", "sum", "skew"]:
@@ -2018,38 +2030,17 @@ def get_result(**kwargs):
                 tm.assert_equal(result, expected)
                 return
 
-        if (op in ["min", "max", "skew"] and isinstance(values, Categorical)) or (
-            op == "skew" and df.dtypes[0].kind == "M"
+        if op == "skew" and (
+            isinstance(values, Categorical) or df.dtypes[0].kind == "M"
         ):
-            if op == "skew" or len(keys) == 1:
-                msg = "|".join(
-                    [
-                        "Categorical is not ordered",
-                        "does not support reduction",
-                    ]
-                )
-                with pytest.raises(TypeError, match=msg):
-                    get_result()
-                return
-            # Categorical doesn't implement, so with numeric_only=True
-            #  these are dropped and we get an empty DataFrame back
-            result = get_result()
-
-            # with numeric_only=True, these are dropped, and we get
-            # an empty DataFrame back
-            if len(keys) != 1:
-                # Categorical is special without 'observed=True'
-                lev = Categorical([0], dtype=values.dtype)
-                mi = MultiIndex.from_product([lev, lev], names=keys)
-                expected = DataFrame([], columns=[], index=mi)
-            else:
-                # all columns are dropped, but we end up with one row
-                # Categorical is special without 'observed=True'
-                lev = Categorical([0], dtype=values.dtype)
-                ci = Index(lev, name=keys[0])
-                expected = DataFrame([], columns=[], index=ci)
-
-            tm.assert_equal(result, expected)
+            msg = "|".join(
+                [
+                    "Categorical is not ordered",
+                    "does not support reduction",
+                ]
+            )
+            with pytest.raises(TypeError, match=msg):
+                get_result()
             return
 
     result = get_result()

From 9225fc75525fe3d8b26673aec4edccdfceaa940a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 27 Jan 2023 16:50:38 -0800
Subject: [PATCH 2/6] GH ref

---
 doc/source/whatsnew/v2.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 08890018804e2..3e465e1d7c07b 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -958,7 +958,7 @@ Categorical
 - Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
 - Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
-- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`??`)
+- Bug in :meth:`SeriesGroupBy.min`, :meth:`SeriesGroupBy.max`, :meth:`DataFrameGroupBy.min`, and :meth:`DataFrameGroupBy.max` with unordered :class:`CategoricalDtype` with no groups failing to raise ``TypeError`` (:issue:`51034`)
 -
 
 Datetimelike

From 203431e6c0ec7bb81fcd53e7724d6db57f55563b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 27 Jan 2023 19:09:04 -0800
Subject: [PATCH 3/6] update test

---
 pandas/tests/groupby/test_function.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1e16e353cc1a4..1ef97fed7ba05 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -250,6 +250,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                 [
                     "Categorical is not ordered",
                     "function is not implemented for this dtype",
+                    f"Cannot perform {method} with non-ordered Categorical",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -276,6 +277,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support",
                     "can't multiply sequence",
                     "function is not implemented for this dtype",
+                    f"Cannot perform {method} with non-ordered Categorical",
                 ]
             )
             with pytest.raises(exception, match=msg):

From 29377a5afd22cdda7652543071ecc6b5c0670f29 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 30 Jan 2023 12:33:05 -0800
Subject: [PATCH 4/6] test for non-ordered Categorical rank

---
 pandas/tests/groupby/test_rank.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 8bbe38d3379ac..d0b848a567346 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -13,6 +13,23 @@
 import pandas._testing as tm
 
 
+def test_rank_unordered_categorical_typeerror():
+    # GH#51034 should be TypeError, not NotImplementedError
+    cat = pd.Categorical([], ordered=False)
+    ser = Series(cat)
+    df = ser.to_frame()
+
+    msg = "Cannot perform rank with non-ordered Categorical"
+
+    gb = ser.groupby(cat)
+    with pytest.raises(TypeError, match=msg):
+        gb.rank()
+
+    gb2 = df.groupby(cat)
+    with pytest.raises(TypeError, match=msg):
+        gb2.rank()
+
+
 def test_rank_apply():
     lev1 = tm.rands_array(10, 100)
     lev2 = tm.rands_array(10, 130)

From 5598a7f1744f3819e097240f6b5c5a28f3b05cd0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 30 Jan 2023 13:11:08 -0800
Subject: [PATCH 5/6] test period case, fix most xfails

---
 pandas/tests/groupby/test_groupby.py | 112 ++++++++++-----------------
 1 file changed, 39 insertions(+), 73 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 5a41b3bdbdc7b..8c00bff291fd2 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error():
         Categorical([0]),
         [to_datetime(0)],
         date_range(0, 1, 1, tz="US/Eastern"),
+        pd.period_range("2016-01-01", periods=3, freq="D"),
         pd.array([0], dtype="Int64"),
         pd.array([0], dtype="Float64"),
         pd.array([False], dtype="boolean"),
@@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error():
         "cat",
         "dt64",
         "dt64tz",
+        "period",
         "Int64",
         "Float64",
         "boolean",
@@ -1886,13 +1888,6 @@ def test_empty_groupby(
     override_dtype = None
 
     if (
-        isinstance(values, Categorical)
-        and not isinstance(columns, list)
-        and op in ["sum", "prod", "skew"]
-    ):
-        # handled below GH#41291
-        pass
-    elif (
         isinstance(values, Categorical)
         and len(keys) == 1
         and op in ["idxmax", "idxmin"]
@@ -1901,18 +1896,8 @@ def test_empty_groupby(
             raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
         )
         request.node.add_marker(mark)
-    elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
-        mark = pytest.mark.xfail(
-            raises=AssertionError, match="(DataFrame|Series) are different"
-        )
-        request.node.add_marker(mark)
-    elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]:
-        mark = pytest.mark.xfail(
-            raises=AssertionError, match="(DataFrame|Series) are different"
-        )
-        request.node.add_marker(mark)
 
-    elif isinstance(values, BooleanArray) and op in ["sum", "prod"]:
+    if isinstance(values, BooleanArray) and op in ["sum", "prod"]:
         # We expect to get Int64 back for these
         override_dtype = "Int64"
 
@@ -1936,6 +1921,26 @@ def get_result(**kwargs):
         else:
             return getattr(gb, method)(op, **kwargs)
 
+    def get_categorical_invalid_expected():
+        # Categorical is special without 'observed=True', we get an NaN entry
+        #  corresponding to the unobserved group. If we passed observed=True
+        #  to groupby, expected would just be 'df.set_index(keys)[columns]'
+        #  as below
+        lev = Categorical([0], dtype=values.dtype)
+        if len(keys) != 1:
+            idx = MultiIndex.from_product([lev, lev], names=keys)
+        else:
+            # all columns are dropped, but we end up with one row
+            # Categorical is special without 'observed=True'
+            idx = Index(lev, name=keys[0])
+
+        expected = DataFrame([], columns=[], index=idx)
+        return expected
+
+    is_per = isinstance(df.dtypes[0], pd.PeriodDtype)
+    is_dt64 = df.dtypes[0].kind == "M"
+    is_cat = isinstance(values, Categorical)
+
     if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
         msg = f"Cannot perform {op} with non-ordered Categorical"
         with pytest.raises(TypeError, match=msg):
@@ -1944,20 +1949,7 @@ def get_result(**kwargs):
         if isinstance(columns, list):
             # i.e. DataframeGroupBy, not SeriesGroupBy
             result = get_result(numeric_only=True)
-
-            # Categorical is special without 'observed=True', we get an NaN entry
-            #  corresponding to the unobserved group. If we passed observed=True
-            #  to groupby, expected would just be 'df.set_index(keys)[columns]'
-            #  as below
-            lev = Categorical([0], dtype=values.dtype)
-            if len(keys) != 1:
-                idx = MultiIndex.from_product([lev, lev], names=keys)
-            else:
-                # all columns are dropped, but we end up with one row
-                # Categorical is special without 'observed=True'
-                idx = Index(lev, name=keys[0])
-
-            expected = DataFrame([], columns=[], index=idx)
+            expected = get_categorical_invalid_expected()
             tm.assert_equal(result, expected)
         return
 
@@ -1965,22 +1957,15 @@ def get_result(**kwargs):
         # i.e. SeriesGroupBy
         if op in ["prod", "sum", "skew"]:
             # ops that require more than just ordered-ness
-            if df.dtypes[0].kind == "M":
+            if is_dt64 or is_cat or is_per:
                 # GH#41291
                 # datetime64 -> prod and sum are invalid
                 if op == "skew":
                     msg = "does not support reduction 'skew'"
-                else:
+                elif is_dt64:
                     msg = "datetime64 type does not support"
-                with pytest.raises(TypeError, match=msg):
-                    get_result()
-
-                return
-        if op in ["prod", "sum", "skew"]:
-            if isinstance(values, Categorical):
-                # GH#41291
-                if op == "skew":
-                    msg = f"does not support reduction '{op}'"
+                elif is_per:
+                    msg = "Period type does not support"
                 else:
                     msg = "category type does not support"
                 with pytest.raises(TypeError, match=msg):
@@ -1991,48 +1976,29 @@ def get_result(**kwargs):
         # ie. DataFrameGroupBy
         if op in ["prod", "sum"]:
             # ops that require more than just ordered-ness
-            if df.dtypes[0].kind == "M":
+            if is_dt64 or is_per or is_cat:
                 # GH#41291
                 # datetime64 -> prod and sum are invalid
-                with pytest.raises(TypeError, match="datetime64 type does not support"):
-                    get_result()
-                result = get_result(numeric_only=True)
-
-                # with numeric_only=True, these are dropped, and we get
-                # an empty DataFrame back
-                expected = df.set_index(keys)[[]]
-                tm.assert_equal(result, expected)
-                return
+                if is_dt64:
+                    msg = "datetime64 type does not support"
+                elif is_per:
+                    msg = "Period type does not support"
+                else:
+                    msg = "category type does not support"
 
-            elif isinstance(values, Categorical):
-                # GH#41291
-                # Categorical doesn't implement sum or prod
-                with pytest.raises(TypeError, match="category type does not support"):
+                with pytest.raises(TypeError, match=msg):
                     get_result()
                 result = get_result(numeric_only=True)
 
                 # with numeric_only=True, these are dropped, and we get
                 # an empty DataFrame back
                 expected = df.set_index(keys)[[]]
-                if len(keys) != 1 and op == "prod":
-                    # TODO: why just prod and not sum?
-                    # Categorical is special without 'observed=True'
-                    lev = Categorical([0], dtype=values.dtype)
-                    mi = MultiIndex.from_product([lev, lev], names=["A", "B"])
-                    expected = DataFrame([], columns=[], index=mi)
-
-                tm.assert_equal(result, expected)
-                return
-
-            elif df.dtypes[0] == object:
-                result = get_result()
-                expected = df.set_index(keys)[["C"]]
+                if is_cat:
+                    expected = get_categorical_invalid_expected()
                 tm.assert_equal(result, expected)
                 return
 
-        if op == "skew" and (
-            isinstance(values, Categorical) or df.dtypes[0].kind == "M"
-        ):
+        if op == "skew" and (is_cat or is_dt64 or is_per):
             msg = "|".join(
                 [
                     "Categorical is not ordered",

From 346dd91c433516f46925cf8390d724db2f571cd3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 30 Jan 2023 13:15:10 -0800
Subject: [PATCH 6/6] simplify

---
 pandas/tests/groupby/test_groupby.py | 67 ++++++++++------------------
 1 file changed, 24 insertions(+), 43 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 8c00bff291fd2..2c6d386a36e78 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1953,41 +1953,33 @@ def get_categorical_invalid_expected():
             tm.assert_equal(result, expected)
         return
 
-    if columns == "C":
-        # i.e. SeriesGroupBy
-        if op in ["prod", "sum", "skew"]:
-            # ops that require more than just ordered-ness
-            if is_dt64 or is_cat or is_per:
-                # GH#41291
-                # datetime64 -> prod and sum are invalid
-                if op == "skew":
-                    msg = "does not support reduction 'skew'"
-                elif is_dt64:
-                    msg = "datetime64 type does not support"
-                elif is_per:
-                    msg = "Period type does not support"
-                else:
-                    msg = "category type does not support"
-                with pytest.raises(TypeError, match=msg):
-                    get_result()
+    if op in ["prod", "sum", "skew"]:
+        # ops that require more than just ordered-ness
+        if is_dt64 or is_cat or is_per:
+            # GH#41291
+            # datetime64 -> prod and sum are invalid
+            if op == "skew":
+                msg = "does not support reduction 'skew'"
+            elif is_dt64:
+                msg = "datetime64 type does not support"
+            elif is_per:
+                msg = "Period type does not support"
+            else:
+                msg = "category type does not support"
+            with pytest.raises(TypeError, match=msg):
+                get_result()
 
+            if not isinstance(columns, list):
+                # i.e. SeriesGroupBy
                 return
-    else:
-        # ie. DataFrameGroupBy
-        if op in ["prod", "sum"]:
-            # ops that require more than just ordered-ness
-            if is_dt64 or is_per or is_cat:
+            elif op == "skew":
+                # TODO: test the numeric_only=True case
+                return
+            else:
+                # i.e. op in ["prod", "sum"]:
+                # i.e. DataFrameGroupBy
+                # ops that require more than just ordered-ness
                 # GH#41291
-                # datetime64 -> prod and sum are invalid
-                if is_dt64:
-                    msg = "datetime64 type does not support"
-                elif is_per:
-                    msg = "Period type does not support"
-                else:
-                    msg = "category type does not support"
-
-                with pytest.raises(TypeError, match=msg):
-                    get_result()
                 result = get_result(numeric_only=True)
 
                 # with numeric_only=True, these are dropped, and we get
@@ -1998,17 +1990,6 @@ def get_categorical_invalid_expected():
                 tm.assert_equal(result, expected)
                 return
 
-        if op == "skew" and (is_cat or is_dt64 or is_per):
-            msg = "|".join(
-                [
-                    "Categorical is not ordered",
-                    "does not support reduction",
-                ]
-            )
-            with pytest.raises(TypeError, match=msg):
-                get_result()
-            return
-
     result = get_result()
     expected = df.set_index(keys)[columns]
     if override_dtype is not None: