diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 659ca228bdcb0..9a813e866e8d0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3212,6 +3212,9 @@ def ngroup(self, ascending: bool = True): would be seen when iterating over the groupby object, not the order they are first observed. + Groups with missing keys (where `pd.isna()` is True) will be labeled with `NaN` + and will be skipped from the count. + Parameters ---------- ascending : bool, default True @@ -3228,38 +3231,38 @@ def ngroup(self, ascending: bool = True): Examples -------- - >>> df = pd.DataFrame({"A": list("aaabba")}) + >>> df = pd.DataFrame({"color": ["red", None, "red", "blue", "blue", "red"]}) >>> df - A - 0 a - 1 a - 2 a - 3 b - 4 b - 5 a - >>> df.groupby('A').ngroup() - 0 0 - 1 0 - 2 0 - 3 1 - 4 1 - 5 0 - dtype: int64 - >>> df.groupby('A').ngroup(ascending=False) + color + 0 red + 1 None + 2 red + 3 blue + 4 blue + 5 red + >>> df.groupby("color").ngroup() + 0 1.0 + 1 NaN + 2 1.0 + 3 0.0 + 4 0.0 + 5 1.0 + dtype: float64 + >>> df.groupby("color", dropna=False).ngroup() 0 1 - 1 1 + 1 2 2 1 3 0 4 0 5 1 dtype: int64 - >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() - 0 0 + >>> df.groupby("color", dropna=False).ngroup(ascending=False) + 0 1 1 0 2 1 - 3 3 + 3 2 4 2 - 5 0 + 5 1 dtype: int64 """ with self._group_selection_context():