Skip to content

Commit 609047b

Browse files
authored
BUG: ignoring sort in DTA.factorize (#53992)
1 parent c1c3f14 commit 609047b

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

pandas/core/arrays/datetimelike.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2211,7 +2211,15 @@ def factorize(
22112211
codes = codes[::-1]
22122212
uniques = uniques[::-1]
22132213
return codes, uniques
2214-
# FIXME: shouldn't get here; we are ignoring sort
2214+
2215+
if sort:
2216+
# algorithms.factorize only passes sort=True here when freq is
2217+
# not None, so this should not be reached.
2218+
raise NotImplementedError(
2219+
f"The 'sort' keyword in {type(self).__name__}.factorize is "
2220+
"ignored unless arr.freq is not None. To factorize with sort, "
2221+
"call pd.factorize(obj, sort=True) instead."
2222+
)
22152223
return super().factorize(use_na_sentinel=use_na_sentinel)
22162224

22172225
@classmethod

pandas/tests/arrays/test_datetimes.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,3 +745,16 @@ def test_iter_zoneinfo_fold(self, tz):
745745
right2 = dta.astype(object)[2]
746746
assert str(left) == str(right2)
747747
assert left.utcoffset() == right2.utcoffset()
748+
749+
750+
def test_factorize_sort_without_freq():
751+
dta = DatetimeArray._from_sequence([0, 2, 1])
752+
753+
msg = r"call pd.factorize\(obj, sort=True\) instead"
754+
with pytest.raises(NotImplementedError, match=msg):
755+
dta.factorize(sort=True)
756+
757+
# Do TimedeltaArray while we're here
758+
tda = dta - dta[0]
759+
with pytest.raises(NotImplementedError, match=msg):
760+
tda.factorize(sort=True)

0 commit comments

Comments
 (0)