From ff73cec067a77b2d51d7173dbc09617ab159844e Mon Sep 17 00:00:00 2001 From: Neel Raman Date: Tue, 20 Jul 2021 17:35:24 -0500 Subject: [PATCH 1/2] BUG: Fix bug in SeriesGroupBy.value_counts when DataFrame has one row (#42618) --- pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_value_counts.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 88d1baae86467..a6be85bf2be2a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -758,7 +758,7 @@ def apply_series_value_counts(): # new values are where sorted labels change lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) inc = np.r_[True, lchanges] - if not len(lchanges): + if not len(val): inc = lchanges inc[idx] = True # group boundaries are also new values out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 8bb07b7163f2e..fc8c9d2ca1cb0 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -143,6 +143,25 @@ def test_series_groupby_value_counts_empty(): tm.assert_series_equal(result, expected) +def test_series_groupby_value_counts_one_row(): + # GH42618 + df = DataFrame([[1, 2]], columns=["A", "B"]) + dfg = df.groupby("A") + + result = dfg["B"].value_counts() + expected = df.value_counts() + + tm.assert_series_equal(result, expected, check_names=False) + + df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"]) + dfg = df.groupby(["A", "B"]) + + result = dfg["C"].value_counts() + expected = df.value_counts() + + tm.assert_series_equal(result, expected, check_names=False) + + def test_series_groupby_value_counts_on_categorical(): # GH38672 From 0a10eec630a787cdff5c1cf27e6a7fe92bbb73b0 Mon Sep 17 00:00:00 2001 From: Neel Raman Date: Tue, 20 Jul 2021 17:44:14 -0500 Subject: [PATCH 2/2] add whatsnew entry (#42618) --- doc/source/whatsnew/v1.3.1.rst | 1 + pandas/tests/groupby/test_value_counts.py | 43 ++++++++--------------- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 2e3c15eca972f..90ec2da934af5 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`) - Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`) - Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`) +- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index fc8c9d2ca1cb0..54f672cb69800 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -122,44 +122,29 @@ def test_series_groupby_value_counts_with_grouper(): tm.assert_series_equal(result, expected) -def test_series_groupby_value_counts_empty(): +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_empty(columns): # GH39172 - df = DataFrame(columns=["A", "B"]) - dfg = df.groupby("A") + df = DataFrame(columns=columns) + dfg = df.groupby(columns[:-1]) - result = dfg["B"].value_counts() - expected = Series([], name="B", dtype=result.dtype) - expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"]) + result = dfg[columns[-1]].value_counts() + expected = Series([], name=columns[-1], dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) tm.assert_series_equal(result, expected) - df = DataFrame(columns=["A", "B", "C"]) - dfg = df.groupby(["A", "B"]) - result = dfg["C"].value_counts() - expected = Series([], name="C", dtype=result.dtype) - expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"]) - - tm.assert_series_equal(result, expected) - - -def test_series_groupby_value_counts_one_row(): +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_one_row(columns): # GH42618 - df = DataFrame([[1, 2]], columns=["A", "B"]) - dfg = df.groupby("A") + df = DataFrame(data=[range(len(columns))], columns=columns) + dfg = df.groupby(columns[:-1]) - result = dfg["B"].value_counts() - expected = df.value_counts() + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) - tm.assert_series_equal(result, expected, check_names=False) - - df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"]) - dfg = df.groupby(["A", "B"]) - - result = dfg["C"].value_counts() - expected = df.value_counts() - - tm.assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected) def test_series_groupby_value_counts_on_categorical():