Skip to content

Commit 5598a7f

Browse files
committed
test period case, fix most xfails
1 parent 29377a5 commit 5598a7f

File tree

1 file changed

+39
-73
lines changed

1 file changed

+39
-73
lines changed

pandas/tests/groupby/test_groupby.py

Lines changed: 39 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error():
18581858
Categorical([0]),
18591859
[to_datetime(0)],
18601860
date_range(0, 1, 1, tz="US/Eastern"),
1861+
pd.period_range("2016-01-01", periods=3, freq="D"),
18611862
pd.array([0], dtype="Int64"),
18621863
pd.array([0], dtype="Float64"),
18631864
pd.array([False], dtype="boolean"),
@@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error():
18701871
"cat",
18711872
"dt64",
18721873
"dt64tz",
1874+
"period",
18731875
"Int64",
18741876
"Float64",
18751877
"boolean",
@@ -1886,13 +1888,6 @@ def test_empty_groupby(
18861888
override_dtype = None
18871889

18881890
if (
1889-
isinstance(values, Categorical)
1890-
and not isinstance(columns, list)
1891-
and op in ["sum", "prod", "skew"]
1892-
):
1893-
# handled below GH#41291
1894-
pass
1895-
elif (
18961891
isinstance(values, Categorical)
18971892
and len(keys) == 1
18981893
and op in ["idxmax", "idxmin"]
@@ -1901,18 +1896,8 @@ def test_empty_groupby(
19011896
raises=ValueError, match="attempt to get arg(min|max) of an empty sequence"
19021897
)
19031898
request.node.add_marker(mark)
1904-
elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
1905-
mark = pytest.mark.xfail(
1906-
raises=AssertionError, match="(DataFrame|Series) are different"
1907-
)
1908-
request.node.add_marker(mark)
1909-
elif isinstance(values, Categorical) and len(keys) == 2 and op in ["sum"]:
1910-
mark = pytest.mark.xfail(
1911-
raises=AssertionError, match="(DataFrame|Series) are different"
1912-
)
1913-
request.node.add_marker(mark)
19141899

1915-
elif isinstance(values, BooleanArray) and op in ["sum", "prod"]:
1900+
if isinstance(values, BooleanArray) and op in ["sum", "prod"]:
19161901
# We expect to get Int64 back for these
19171902
override_dtype = "Int64"
19181903

@@ -1936,6 +1921,26 @@ def get_result(**kwargs):
19361921
else:
19371922
return getattr(gb, method)(op, **kwargs)
19381923

1924+
def get_categorical_invalid_expected():
1925+
# Categorical is special without 'observed=True', we get an NaN entry
1926+
# corresponding to the unobserved group. If we passed observed=True
1927+
# to groupby, expected would just be 'df.set_index(keys)[columns]'
1928+
# as below
1929+
lev = Categorical([0], dtype=values.dtype)
1930+
if len(keys) != 1:
1931+
idx = MultiIndex.from_product([lev, lev], names=keys)
1932+
else:
1933+
# all columns are dropped, but we end up with one row
1934+
# Categorical is special without 'observed=True'
1935+
idx = Index(lev, name=keys[0])
1936+
1937+
expected = DataFrame([], columns=[], index=idx)
1938+
return expected
1939+
1940+
is_per = isinstance(df.dtypes[0], pd.PeriodDtype)
1941+
is_dt64 = df.dtypes[0].kind == "M"
1942+
is_cat = isinstance(values, Categorical)
1943+
19391944
if isinstance(values, Categorical) and not values.ordered and op in ["min", "max"]:
19401945
msg = f"Cannot perform {op} with non-ordered Categorical"
19411946
with pytest.raises(TypeError, match=msg):
@@ -1944,43 +1949,23 @@ def get_result(**kwargs):
19441949
if isinstance(columns, list):
19451950
# i.e. DataframeGroupBy, not SeriesGroupBy
19461951
result = get_result(numeric_only=True)
1947-
1948-
# Categorical is special without 'observed=True', we get an NaN entry
1949-
# corresponding to the unobserved group. If we passed observed=True
1950-
# to groupby, expected would just be 'df.set_index(keys)[columns]'
1951-
# as below
1952-
lev = Categorical([0], dtype=values.dtype)
1953-
if len(keys) != 1:
1954-
idx = MultiIndex.from_product([lev, lev], names=keys)
1955-
else:
1956-
# all columns are dropped, but we end up with one row
1957-
# Categorical is special without 'observed=True'
1958-
idx = Index(lev, name=keys[0])
1959-
1960-
expected = DataFrame([], columns=[], index=idx)
1952+
expected = get_categorical_invalid_expected()
19611953
tm.assert_equal(result, expected)
19621954
return
19631955

19641956
if columns == "C":
19651957
# i.e. SeriesGroupBy
19661958
if op in ["prod", "sum", "skew"]:
19671959
# ops that require more than just ordered-ness
1968-
if df.dtypes[0].kind == "M":
1960+
if is_dt64 or is_cat or is_per:
19691961
# GH#41291
19701962
# datetime64 -> prod and sum are invalid
19711963
if op == "skew":
19721964
msg = "does not support reduction 'skew'"
1973-
else:
1965+
elif is_dt64:
19741966
msg = "datetime64 type does not support"
1975-
with pytest.raises(TypeError, match=msg):
1976-
get_result()
1977-
1978-
return
1979-
if op in ["prod", "sum", "skew"]:
1980-
if isinstance(values, Categorical):
1981-
# GH#41291
1982-
if op == "skew":
1983-
msg = f"does not support reduction '{op}'"
1967+
elif is_per:
1968+
msg = "Period type does not support"
19841969
else:
19851970
msg = "category type does not support"
19861971
with pytest.raises(TypeError, match=msg):
@@ -1991,48 +1976,29 @@ def get_result(**kwargs):
19911976
# ie. DataFrameGroupBy
19921977
if op in ["prod", "sum"]:
19931978
# ops that require more than just ordered-ness
1994-
if df.dtypes[0].kind == "M":
1979+
if is_dt64 or is_per or is_cat:
19951980
# GH#41291
19961981
# datetime64 -> prod and sum are invalid
1997-
with pytest.raises(TypeError, match="datetime64 type does not support"):
1998-
get_result()
1999-
result = get_result(numeric_only=True)
2000-
2001-
# with numeric_only=True, these are dropped, and we get
2002-
# an empty DataFrame back
2003-
expected = df.set_index(keys)[[]]
2004-
tm.assert_equal(result, expected)
2005-
return
1982+
if is_dt64:
1983+
msg = "datetime64 type does not support"
1984+
elif is_per:
1985+
msg = "Period type does not support"
1986+
else:
1987+
msg = "category type does not support"
20061988

2007-
elif isinstance(values, Categorical):
2008-
# GH#41291
2009-
# Categorical doesn't implement sum or prod
2010-
with pytest.raises(TypeError, match="category type does not support"):
1989+
with pytest.raises(TypeError, match=msg):
20111990
get_result()
20121991
result = get_result(numeric_only=True)
20131992

20141993
# with numeric_only=True, these are dropped, and we get
20151994
# an empty DataFrame back
20161995
expected = df.set_index(keys)[[]]
2017-
if len(keys) != 1 and op == "prod":
2018-
# TODO: why just prod and not sum?
2019-
# Categorical is special without 'observed=True'
2020-
lev = Categorical([0], dtype=values.dtype)
2021-
mi = MultiIndex.from_product([lev, lev], names=["A", "B"])
2022-
expected = DataFrame([], columns=[], index=mi)
2023-
2024-
tm.assert_equal(result, expected)
2025-
return
2026-
2027-
elif df.dtypes[0] == object:
2028-
result = get_result()
2029-
expected = df.set_index(keys)[["C"]]
1996+
if is_cat:
1997+
expected = get_categorical_invalid_expected()
20301998
tm.assert_equal(result, expected)
20311999
return
20322000

2033-
if op == "skew" and (
2034-
isinstance(values, Categorical) or df.dtypes[0].kind == "M"
2035-
):
2001+
if op == "skew" and (is_cat or is_dt64 or is_per):
20362002
msg = "|".join(
20372003
[
20382004
"Categorical is not ordered",

0 commit comments

Comments
 (0)