@@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
46
46
# In cases with c.ordered, this is equivalent to
47
47
# return c.remove_unused_categories(), c
48
48
49
- unique_codes = unique1d (c .codes ) # type: ignore[no-untyped-call]
49
+ take_codes = unique1d (c .codes [ c . codes != - 1 ] ) # type: ignore[no-untyped-call]
50
50
51
- take_codes = unique_codes [unique_codes != - 1 ]
52
51
if sort :
53
52
take_codes = np .sort (take_codes )
54
53
@@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
67
66
68
67
# sort=False should order groups in as-encountered order (GH-8868)
69
68
70
- # xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
71
- all_codes = np .arange (c .categories .nunique ())
69
+ # GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
72
70
# GH 38140: exclude nan from indexer for categories
73
71
unique_notnan_codes = unique1d (c .codes [c .codes != - 1 ]) # type: ignore[no-untyped-call]
74
72
if sort :
75
73
unique_notnan_codes = np .sort (unique_notnan_codes )
76
- if len (all_codes ) > len (unique_notnan_codes ):
74
+ if ( num_cat := len (c . categories ) ) > len (unique_notnan_codes ):
77
75
# GH 13179: All categories need to be present, even if missing from the data
78
- missing_codes = np .setdiff1d (all_codes , unique_notnan_codes , assume_unique = True )
76
+ missing_codes = np .setdiff1d (
77
+ np .arange (num_cat ), unique_notnan_codes , assume_unique = True
78
+ )
79
79
take_codes = np .concatenate ((unique_notnan_codes , missing_codes ))
80
80
else :
81
81
take_codes = unique_notnan_codes
82
82
83
- return Categorical (c , c .unique (). categories .take (take_codes ))
83
+ return Categorical (c , c .categories .take (take_codes ))
0 commit comments