@@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error():
1858
1858
Categorical ([0 ]),
1859
1859
[to_datetime (0 )],
1860
1860
date_range (0 , 1 , 1 , tz = "US/Eastern" ),
1861
+ pd .period_range ("2016-01-01" , periods = 3 , freq = "D" ),
1861
1862
pd .array ([0 ], dtype = "Int64" ),
1862
1863
pd .array ([0 ], dtype = "Float64" ),
1863
1864
pd .array ([False ], dtype = "boolean" ),
@@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error():
1870
1871
"cat" ,
1871
1872
"dt64" ,
1872
1873
"dt64tz" ,
1874
+ "period" ,
1873
1875
"Int64" ,
1874
1876
"Float64" ,
1875
1877
"boolean" ,
@@ -1886,13 +1888,6 @@ def test_empty_groupby(
1886
1888
override_dtype = None
1887
1889
1888
1890
if (
1889
- isinstance (values , Categorical )
1890
- and not isinstance (columns , list )
1891
- and op in ["sum" , "prod" , "skew" ]
1892
- ):
1893
- # handled below GH#41291
1894
- pass
1895
- elif (
1896
1891
isinstance (values , Categorical )
1897
1892
and len (keys ) == 1
1898
1893
and op in ["idxmax" , "idxmin" ]
@@ -1901,18 +1896,8 @@ def test_empty_groupby(
1901
1896
raises = ValueError , match = "attempt to get arg(min|max) of an empty sequence"
1902
1897
)
1903
1898
request .node .add_marker (mark )
1904
- elif isinstance (values , Categorical ) and len (keys ) == 1 and op in ["sum" , "prod" ]:
1905
- mark = pytest .mark .xfail (
1906
- raises = AssertionError , match = "(DataFrame|Series) are different"
1907
- )
1908
- request .node .add_marker (mark )
1909
- elif isinstance (values , Categorical ) and len (keys ) == 2 and op in ["sum" ]:
1910
- mark = pytest .mark .xfail (
1911
- raises = AssertionError , match = "(DataFrame|Series) are different"
1912
- )
1913
- request .node .add_marker (mark )
1914
1899
1915
- elif isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1900
+ if isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1916
1901
# We expect to get Int64 back for these
1917
1902
override_dtype = "Int64"
1918
1903
@@ -1936,6 +1921,26 @@ def get_result(**kwargs):
1936
1921
else :
1937
1922
return getattr (gb , method )(op , ** kwargs )
1938
1923
1924
+ def get_categorical_invalid_expected ():
1925
+ # Categorical is special without 'observed=True', we get an NaN entry
1926
+ # corresponding to the unobserved group. If we passed observed=True
1927
+ # to groupby, expected would just be 'df.set_index(keys)[columns]'
1928
+ # as below
1929
+ lev = Categorical ([0 ], dtype = values .dtype )
1930
+ if len (keys ) != 1 :
1931
+ idx = MultiIndex .from_product ([lev , lev ], names = keys )
1932
+ else :
1933
+ # all columns are dropped, but we end up with one row
1934
+ # Categorical is special without 'observed=True'
1935
+ idx = Index (lev , name = keys [0 ])
1936
+
1937
+ expected = DataFrame ([], columns = [], index = idx )
1938
+ return expected
1939
+
1940
+ is_per = isinstance (df .dtypes [0 ], pd .PeriodDtype )
1941
+ is_dt64 = df .dtypes [0 ].kind == "M"
1942
+ is_cat = isinstance (values , Categorical )
1943
+
1939
1944
if isinstance (values , Categorical ) and not values .ordered and op in ["min" , "max" ]:
1940
1945
msg = f"Cannot perform { op } with non-ordered Categorical"
1941
1946
with pytest .raises (TypeError , match = msg ):
@@ -1944,43 +1949,23 @@ def get_result(**kwargs):
1944
1949
if isinstance (columns , list ):
1945
1950
# i.e. DataframeGroupBy, not SeriesGroupBy
1946
1951
result = get_result (numeric_only = True )
1947
-
1948
- # Categorical is special without 'observed=True', we get an NaN entry
1949
- # corresponding to the unobserved group. If we passed observed=True
1950
- # to groupby, expected would just be 'df.set_index(keys)[columns]'
1951
- # as below
1952
- lev = Categorical ([0 ], dtype = values .dtype )
1953
- if len (keys ) != 1 :
1954
- idx = MultiIndex .from_product ([lev , lev ], names = keys )
1955
- else :
1956
- # all columns are dropped, but we end up with one row
1957
- # Categorical is special without 'observed=True'
1958
- idx = Index (lev , name = keys [0 ])
1959
-
1960
- expected = DataFrame ([], columns = [], index = idx )
1952
+ expected = get_categorical_invalid_expected ()
1961
1953
tm .assert_equal (result , expected )
1962
1954
return
1963
1955
1964
1956
if columns == "C" :
1965
1957
# i.e. SeriesGroupBy
1966
1958
if op in ["prod" , "sum" , "skew" ]:
1967
1959
# ops that require more than just ordered-ness
1968
- if df . dtypes [ 0 ]. kind == "M" :
1960
+ if is_dt64 or is_cat or is_per :
1969
1961
# GH#41291
1970
1962
# datetime64 -> prod and sum are invalid
1971
1963
if op == "skew" :
1972
1964
msg = "does not support reduction 'skew'"
1973
- else :
1965
+ elif is_dt64 :
1974
1966
msg = "datetime64 type does not support"
1975
- with pytest .raises (TypeError , match = msg ):
1976
- get_result ()
1977
-
1978
- return
1979
- if op in ["prod" , "sum" , "skew" ]:
1980
- if isinstance (values , Categorical ):
1981
- # GH#41291
1982
- if op == "skew" :
1983
- msg = f"does not support reduction '{ op } '"
1967
+ elif is_per :
1968
+ msg = "Period type does not support"
1984
1969
else :
1985
1970
msg = "category type does not support"
1986
1971
with pytest .raises (TypeError , match = msg ):
@@ -1991,48 +1976,29 @@ def get_result(**kwargs):
1991
1976
# ie. DataFrameGroupBy
1992
1977
if op in ["prod" , "sum" ]:
1993
1978
# ops that require more than just ordered-ness
1994
- if df . dtypes [ 0 ]. kind == "M" :
1979
+ if is_dt64 or is_per or is_cat :
1995
1980
# GH#41291
1996
1981
# datetime64 -> prod and sum are invalid
1997
- with pytest .raises (TypeError , match = "datetime64 type does not support" ):
1998
- get_result ()
1999
- result = get_result (numeric_only = True )
2000
-
2001
- # with numeric_only=True, these are dropped, and we get
2002
- # an empty DataFrame back
2003
- expected = df .set_index (keys )[[]]
2004
- tm .assert_equal (result , expected )
2005
- return
1982
+ if is_dt64 :
1983
+ msg = "datetime64 type does not support"
1984
+ elif is_per :
1985
+ msg = "Period type does not support"
1986
+ else :
1987
+ msg = "category type does not support"
2006
1988
2007
- elif isinstance (values , Categorical ):
2008
- # GH#41291
2009
- # Categorical doesn't implement sum or prod
2010
- with pytest .raises (TypeError , match = "category type does not support" ):
1989
+ with pytest .raises (TypeError , match = msg ):
2011
1990
get_result ()
2012
1991
result = get_result (numeric_only = True )
2013
1992
2014
1993
# with numeric_only=True, these are dropped, and we get
2015
1994
# an empty DataFrame back
2016
1995
expected = df .set_index (keys )[[]]
2017
- if len (keys ) != 1 and op == "prod" :
2018
- # TODO: why just prod and not sum?
2019
- # Categorical is special without 'observed=True'
2020
- lev = Categorical ([0 ], dtype = values .dtype )
2021
- mi = MultiIndex .from_product ([lev , lev ], names = ["A" , "B" ])
2022
- expected = DataFrame ([], columns = [], index = mi )
2023
-
2024
- tm .assert_equal (result , expected )
2025
- return
2026
-
2027
- elif df .dtypes [0 ] == object :
2028
- result = get_result ()
2029
- expected = df .set_index (keys )[["C" ]]
1996
+ if is_cat :
1997
+ expected = get_categorical_invalid_expected ()
2030
1998
tm .assert_equal (result , expected )
2031
1999
return
2032
2000
2033
- if op == "skew" and (
2034
- isinstance (values , Categorical ) or df .dtypes [0 ].kind == "M"
2035
- ):
2001
+ if op == "skew" and (is_cat or is_dt64 or is_per ):
2036
2002
msg = "|" .join (
2037
2003
[
2038
2004
"Categorical is not ordered" ,
0 commit comments