From b961ff5e0ba3a66e3c4daeb58c646ce3f4aeb7e9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 7 Oct 2024 17:09:16 -0400 Subject: [PATCH 1/2] REGR: groupby.value_counts with all NA values --- pandas/core/groupby/ops.py | 4 +++- .../groupby/methods/test_value_counts.py | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a82e77140d274..b275ef4c4c1b1 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -881,7 +881,9 @@ def _ob_index_and_ids( sorter = ob_index.argsort() ob_index = ob_index.take(sorter) _, index = np.unique(sorter, return_index=True) - ob_ids = np.where(ob_ids == -1, -1, index.take(ob_ids)) + na_ids = ob_ids == -1 + if not na_ids.all(): + ob_ids = np.where(na_ids, -1, index.take(ob_ids)) ob_ids = ensure_platform_int(ob_ids) return ob_index, ob_ids diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 8f3022fbe551c..8ca6593a19f20 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -1219,3 +1219,25 @@ def test_value_counts_sort_categorical(sort, vc_sort, normalize): expected = expected.take(taker) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("groupby_sort", [True, False]) +def test_value_counts_all_na(sort, dropna, groupby_sort): + # GH#59989 + df = DataFrame({"a": [2, 1, 1], "b": np.nan}) + gb = df.groupby("a", sort=groupby_sort) + result = gb.value_counts(sort=sort, dropna=dropna) + + kwargs = {"levels": [[1, 2], [np.nan]], "names": ["a", "b"]} + if dropna: + data = [] + index = MultiIndex(codes=[[], []], **kwargs) + elif not groupby_sort and not sort: + data = [1, 2] + index = MultiIndex(codes=[[1, 0], [0, 0]], **kwargs) + else: + data = [2, 1] + index = MultiIndex(codes=[[0, 1], [0, 0]], **kwargs) + expected = Series(data, index=index, dtype="int64", name="count") + + tm.assert_series_equal(result, expected) From 995d1c866b5673e555c7698d0e70cbe5dcfde2ed Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 7 Oct 2024 17:12:11 -0400 Subject: [PATCH 2/2] Better implementation --- pandas/core/groupby/ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b275ef4c4c1b1..b32119a2ddbde 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -867,7 +867,7 @@ def _ob_index_and_ids( names=names, verify_integrity=False, ) - if not consistent_sorting: + if not consistent_sorting and len(ob_index) > 0: # Sort by the levels where the corresponding sort argument is True n_levels = len(sorts) drop_levels = [ @@ -881,9 +881,7 @@ def _ob_index_and_ids( sorter = ob_index.argsort() ob_index = ob_index.take(sorter) _, index = np.unique(sorter, return_index=True) - na_ids = ob_ids == -1 - if not na_ids.all(): - ob_ids = np.where(na_ids, -1, index.take(ob_ids)) + ob_ids = np.where(ob_ids == -1, -1, index.take(ob_ids)) ob_ids = ensure_platform_int(ob_ids) return ob_index, ob_ids