Skip to content

Commit d1d8b66

Browse files
authored
REF: reverse dispatch in factorize (#49966)
1 parent 4d9145f commit d1d8b66

File tree

2 files changed

+18
-23
lines changed

2 files changed

+18
-23
lines changed

pandas/core/algorithms.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
ABCExtensionArray,
7474
ABCIndex,
7575
ABCMultiIndex,
76-
ABCRangeIndex,
7776
ABCSeries,
7877
ABCTimedeltaArray,
7978
)
@@ -738,13 +737,11 @@ def factorize(
738737
# Step 2 is dispatched to extension types (like Categorical). They are
739738
# responsible only for factorization. All data coercion, sorting and boxing
740739
# should happen here.
741-
if isinstance(values, ABCRangeIndex):
742-
return values.factorize(sort=sort)
740+
if isinstance(values, (ABCIndex, ABCSeries)):
741+
return values.factorize(sort=sort, use_na_sentinel=use_na_sentinel)
743742

744743
values = _ensure_arraylike(values)
745744
original = values
746-
if not isinstance(values, ABCMultiIndex):
747-
values = extract_array(values, extract_numpy=True)
748745

749746
if (
750747
isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray))
@@ -753,7 +750,7 @@ def factorize(
753750
# The presence of 'freq' means we can fast-path sorting and know there
754751
# aren't NAs
755752
codes, uniques = values.factorize(sort=sort)
756-
return _re_wrap_factorize(original, uniques, codes)
753+
return codes, uniques
757754

758755
elif not isinstance(values.dtype, np.dtype):
759756
codes, uniques = values.factorize(use_na_sentinel=use_na_sentinel)
@@ -789,21 +786,6 @@ def factorize(
789786

790787
uniques = _reconstruct_data(uniques, original.dtype, original)
791788

792-
return _re_wrap_factorize(original, uniques, codes)
793-
794-
795-
def _re_wrap_factorize(original, uniques, codes: np.ndarray):
796-
"""
797-
Wrap factorize results in Series or Index depending on original type.
798-
"""
799-
if isinstance(original, ABCIndex):
800-
uniques = ensure_wrapped_if_datetimelike(uniques)
801-
uniques = original._shallow_copy(uniques, name=None)
802-
elif isinstance(original, ABCSeries):
803-
from pandas import Index
804-
805-
uniques = Index(uniques)
806-
807789
return codes, uniques
808790

809791

pandas/core/base.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979

8080
from pandas import (
8181
Categorical,
82+
Index,
8283
Series,
8384
)
8485

@@ -1134,8 +1135,20 @@ def factorize(
11341135
self,
11351136
sort: bool = False,
11361137
use_na_sentinel: bool = True,
1137-
):
1138-
return algorithms.factorize(self, sort=sort, use_na_sentinel=use_na_sentinel)
1138+
) -> tuple[npt.NDArray[np.intp], Index]:
1139+
1140+
codes, uniques = algorithms.factorize(
1141+
self._values, sort=sort, use_na_sentinel=use_na_sentinel
1142+
)
1143+
1144+
if isinstance(self, ABCIndex):
1145+
# preserve e.g. NumericIndex, preserve MultiIndex
1146+
uniques = self._constructor(uniques)
1147+
else:
1148+
from pandas import Index
1149+
1150+
uniques = Index(uniques)
1151+
return codes, uniques
11391152

11401153
_shared_docs[
11411154
"searchsorted"

0 commit comments

Comments
 (0)