diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index fa3ac27eba577..ed4a7ffc44441 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) - .. _whatsnew_104.bug_fixes: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 3bfbd1a88650a..53c37c8cc8190 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -892,7 +892,9 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -983,7 +985,9 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] - if not checknull(val): + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index b1476f1059d84..7b2e3f7a7e06a 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -94,6 +94,17 @@ def test_nth_with_na_object(index, nulls_fixture): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy()