diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d6e1f1d138e65..4861b7c90d1bb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error(): Categorical([0]), [to_datetime(0)], date_range(0, 1, 1, tz="US/Eastern"), + pd.period_range("2016-01-01", periods=3, freq="D"), pd.array([0], dtype="Int64"), pd.array([0], dtype="Float64"), pd.array([False], dtype="boolean"), @@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error(): "cat", "dt64", "dt64tz", + "period", "Int64", "Float64", "boolean", @@ -1886,13 +1888,6 @@ def test_empty_groupby( override_dtype = None if ( - isinstance(values, Categorical) - and not isinstance(columns, list) - and op in ["sum", "prod", "skew"] - ): - # handled below GH#41291 - pass - elif ( isinstance(values, Categorical) and len(keys) == 1 and op in ["idxmax", "idxmin"] @@ -1901,18 +1896,8 @@ def test_empty_groupby( raises=ValueError, match="attempt to get arg(min|max) of an empty sequence" ) request.node.add_marker(mark) - elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]: - mark = pytest.mark.xfail( - raises=AssertionError, match="(DataFrame|Series) are different" - ) - request.node.add_marker(mark) - elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]: - mark = pytest.mark.xfail( - raises=AssertionError, match="(DataFrame|Series) are different" - ) - request.node.add_marker(mark) - elif isinstance(values, BooleanArray) and op in ["sum", "prod"]: + if isinstance(values, BooleanArray) and op in ["sum", "prod"]: # We expect to get Int64 back for these override_dtype = "Int64" @@ -1936,6 +1921,26 @@ def get_result(**kwargs): else: return getattr(gb, method)(op, **kwargs) + def get_categorical_invalid_expected(): + # Categorical is special without 'observed=True', we get an NaN entry + # corresponding to the unobserved group. If we passed observed=True + # to groupby, expected would just be 'df.set_index(keys)[columns]' + # as below + lev = Categorical([0], dtype=values.dtype) + if len(keys) != 1: + idx = MultiIndex.from_product([lev, lev], names=keys) + else: + # all columns are dropped, but we end up with one row + # Categorical is special without 'observed=True' + idx = Index(lev, name=keys[0]) + + expected = DataFrame([], columns=[], index=idx) + return expected + + is_per = isinstance(df.dtypes[0], pd.PeriodDtype) + is_dt64 = df.dtypes[0].kind == "M" + is_cat = isinstance(values, Categorical) + if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]: msg = f"Cannot perform {op} with non-ordered Categorical" with pytest.raises(TypeError, match=msg): @@ -1944,105 +1949,47 @@ def get_result(**kwargs): if isinstance(columns, list): # i.e. DataframeGroupBy, not SeriesGroupBy result = get_result(numeric_only=True) - - # Categorical is special without 'observed=True', we get an NaN entry - # corresponding to the unobserved group. If we passed observed=True - # to groupby, expected would just be 'df.set_index(keys)[columns]' - # as below - lev = Categorical([0], dtype=values.dtype) - if len(keys) != 1: - idx = MultiIndex.from_product([lev, lev], names=keys) - else: - # all columns are dropped, but we end up with one row - # Categorical is special without 'observed=True' - idx = Index(lev, name=keys[0]) - - expected = DataFrame([], columns=[], index=idx) + expected = get_categorical_invalid_expected() tm.assert_equal(result, expected) return - if columns == "C": - # i.e. SeriesGroupBy - if op in ["prod", "sum", "skew"]: - # ops that require more than just ordered-ness - if df.dtypes[0].kind == "M": - # GH#41291 - # datetime64 -> prod and sum are invalid - if op == "skew": - msg = "does not support reduction 'skew'" - else: - msg = "datetime64 type does not support" - with pytest.raises(TypeError, match=msg): - get_result() - - return - if op in ["prod", "sum", "skew"]: - if isinstance(values, Categorical): - # GH#41291 - if op == "skew": - msg = f"does not support reduction '{op}'" - else: - msg = "category type does not support" - with pytest.raises(TypeError, match=msg): - get_result() + if op in ["prod", "sum", "skew"]: + # ops that require more than just ordered-ness + if is_dt64 or is_cat or is_per: + # GH#41291 + # datetime64 -> prod and sum are invalid + if op == "skew": + msg = "does not support reduction 'skew'" + elif is_dt64: + msg = "datetime64 type does not support" + elif is_per: + msg = "Period type does not support" + else: + msg = "category type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + if not isinstance(columns, list): + # i.e. SeriesGroupBy return - else: - # ie. DataFrameGroupBy - if op in ["prod", "sum"]: - # ops that require more than just ordered-ness - if df.dtypes[0].kind == "M": - # GH#41291 - # datetime64 -> prod and sum are invalid - with pytest.raises(TypeError, match="datetime64 type does not support"): - get_result() - result = get_result(numeric_only=True) - - # with numeric_only=True, these are dropped, and we get - # an empty DataFrame back - expected = df.set_index(keys)[[]] - tm.assert_equal(result, expected) + elif op == "skew": + # TODO: test the numeric_only=True case return - - elif isinstance(values, Categorical): + else: + # i.e. op in ["prod", "sum"]: + # i.e. DataFrameGroupBy + # ops that require more than just ordered-ness # GH#41291 - # Categorical doesn't implement sum or prod - with pytest.raises(TypeError, match="category type does not support"): - get_result() result = get_result(numeric_only=True) # with numeric_only=True, these are dropped, and we get # an empty DataFrame back expected = df.set_index(keys)[[]] - if len(keys) != 1 and op == "prod": - # TODO: why just prod and not sum? - # Categorical is special without 'observed=True' - lev = Categorical([0], dtype=values.dtype) - mi = MultiIndex.from_product([lev, lev], names=["A", "B"]) - expected = DataFrame([], columns=[], index=mi) - - tm.assert_equal(result, expected) - return - - elif df.dtypes[0] == object: - result = get_result() - expected = df.set_index(keys)[["C"]] + if is_cat: + expected = get_categorical_invalid_expected() tm.assert_equal(result, expected) return - if op == "skew" and ( - isinstance(values, Categorical) or df.dtypes[0].kind == "M" - ): - msg = "|".join( - [ - "Categorical is not ordered", - "does not support reduction", - ] - ) - with pytest.raises(TypeError, match=msg): - get_result() - return - result = get_result() expected = df.set_index(keys)[columns] if override_dtype is not None: