Skip to content

Commit fc3b43a

Browse files
CLN: move categorical tests from test_aggregate to test_categorical (#35052)
1 parent 1706d83 commit fc3b43a

File tree

2 files changed

+53
-53
lines changed

2 files changed

+53
-53
lines changed

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -458,22 +458,6 @@ def test_agg_split_object_part_datetime():
458458
tm.assert_frame_equal(result, expected)
459459

460460

461-
def test_agg_cython_category_not_implemented_fallback():
462-
# https://github.com/pandas-dev/pandas/issues/31450
463-
df = pd.DataFrame({"col_num": [1, 1, 2, 3]})
464-
df["col_cat"] = df["col_num"].astype("category")
465-
466-
result = df.groupby("col_num").col_cat.first()
467-
expected = pd.Series(
468-
[1, 2, 3], index=pd.Index([1, 2, 3], name="col_num"), name="col_cat"
469-
)
470-
tm.assert_series_equal(result, expected)
471-
472-
result = df.groupby("col_num").agg({"col_cat": "first"})
473-
expected = expected.to_frame()
474-
tm.assert_frame_equal(result, expected)
475-
476-
477461
class TestNamedAggregationSeries:
478462
def test_series_named_agg(self):
479463
df = pd.Series([1, 2, 3, 4])
@@ -809,16 +793,6 @@ def test_aggregate_mixed_types():
809793
tm.assert_frame_equal(result, expected)
810794

811795

812-
@pytest.mark.parametrize("func", ["min", "max"])
813-
def test_aggregate_categorical_lost_index(func: str):
814-
# GH: 28641 groupby drops index, when grouping over categorical column with min/max
815-
ds = pd.Series(["b"], dtype="category").cat.as_ordered()
816-
df = pd.DataFrame({"A": [1997], "B": ds})
817-
result = df.groupby("A").agg({"B": func})
818-
expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A"))
819-
tm.assert_frame_equal(result, expected)
820-
821-
822796
@pytest.mark.xfail(reason="Not implemented;see GH 31256")
823797
def test_aggregate_udf_na_extension_type():
824798
# https://github.com/pandas-dev/pandas/pull/31359
@@ -994,30 +968,3 @@ def test_groupby_get_by_index():
994968
res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])})
995969
expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A")
996970
pd.testing.assert_frame_equal(res, expected)
997-
998-
999-
def test_aggregate_categorical_with_isnan():
1000-
# GH 29837
1001-
df = pd.DataFrame(
1002-
{
1003-
"A": [1, 1, 1, 1],
1004-
"B": [1, 2, 1, 2],
1005-
"numerical_col": [0.1, 0.2, np.nan, 0.3],
1006-
"object_col": ["foo", "bar", "foo", "fee"],
1007-
"categorical_col": ["foo", "bar", "foo", "fee"],
1008-
}
1009-
)
1010-
1011-
df = df.astype({"categorical_col": "category"})
1012-
1013-
result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum())
1014-
index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B"))
1015-
expected = pd.DataFrame(
1016-
data={
1017-
"numerical_col": [1.0, 0.0],
1018-
"object_col": [0, 0],
1019-
"categorical_col": [0, 0],
1020-
},
1021-
index=index,
1022-
)
1023-
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_categorical.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,3 +1456,56 @@ def test_sorted_missing_category_values():
14561456
result = df.groupby(["bar", "foo"]).size().unstack()
14571457

14581458
tm.assert_frame_equal(result, expected)
1459+
1460+
1461+
def test_agg_cython_category_not_implemented_fallback():
1462+
# https://github.com/pandas-dev/pandas/issues/31450
1463+
df = pd.DataFrame({"col_num": [1, 1, 2, 3]})
1464+
df["col_cat"] = df["col_num"].astype("category")
1465+
1466+
result = df.groupby("col_num").col_cat.first()
1467+
expected = pd.Series(
1468+
[1, 2, 3], index=pd.Index([1, 2, 3], name="col_num"), name="col_cat"
1469+
)
1470+
tm.assert_series_equal(result, expected)
1471+
1472+
result = df.groupby("col_num").agg({"col_cat": "first"})
1473+
expected = expected.to_frame()
1474+
tm.assert_frame_equal(result, expected)
1475+
1476+
1477+
@pytest.mark.parametrize("func", ["min", "max"])
1478+
def test_aggregate_categorical_lost_index(func: str):
1479+
# GH: 28641 groupby drops index, when grouping over categorical column with min/max
1480+
ds = pd.Series(["b"], dtype="category").cat.as_ordered()
1481+
df = pd.DataFrame({"A": [1997], "B": ds})
1482+
result = df.groupby("A").agg({"B": func})
1483+
expected = pd.DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A"))
1484+
tm.assert_frame_equal(result, expected)
1485+
1486+
1487+
def test_aggregate_categorical_with_isnan():
1488+
# GH 29837
1489+
df = pd.DataFrame(
1490+
{
1491+
"A": [1, 1, 1, 1],
1492+
"B": [1, 2, 1, 2],
1493+
"numerical_col": [0.1, 0.2, np.nan, 0.3],
1494+
"object_col": ["foo", "bar", "foo", "fee"],
1495+
"categorical_col": ["foo", "bar", "foo", "fee"],
1496+
}
1497+
)
1498+
1499+
df = df.astype({"categorical_col": "category"})
1500+
1501+
result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum())
1502+
index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B"))
1503+
expected = pd.DataFrame(
1504+
data={
1505+
"numerical_col": [1.0, 0.0],
1506+
"object_col": [0, 0],
1507+
"categorical_col": [0, 0],
1508+
},
1509+
index=index,
1510+
)
1511+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)