Skip to content

Commit 15e9e7a

Browse files
authored
REF: Minimize operations in recode_for_groupby (#59618)
1 parent bb4ab4f commit 15e9e7a

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

pandas/core/groupby/categorical.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
4646
# In cases with c.ordered, this is equivalent to
4747
# return c.remove_unused_categories(), c
4848

49-
unique_codes = unique1d(c.codes) # type: ignore[no-untyped-call]
49+
take_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]
5050

51-
take_codes = unique_codes[unique_codes != -1]
5251
if sort:
5352
take_codes = np.sort(take_codes)
5453

@@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
6766

6867
# sort=False should order groups in as-encountered order (GH-8868)
6968

70-
# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
71-
all_codes = np.arange(c.categories.nunique())
69+
# GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
7270
# GH 38140: exclude nan from indexer for categories
7371
unique_notnan_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]
7472
if sort:
7573
unique_notnan_codes = np.sort(unique_notnan_codes)
76-
if len(all_codes) > len(unique_notnan_codes):
74+
if (num_cat := len(c.categories)) > len(unique_notnan_codes):
7775
# GH 13179: All categories need to be present, even if missing from the data
78-
missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
76+
missing_codes = np.setdiff1d(
77+
np.arange(num_cat), unique_notnan_codes, assume_unique=True
78+
)
7979
take_codes = np.concatenate((unique_notnan_codes, missing_codes))
8080
else:
8181
take_codes = unique_notnan_codes
8282

83-
return Categorical(c, c.unique().categories.take(take_codes))
83+
return Categorical(c, c.categories.take(take_codes))

0 commit comments

Comments
 (0)