Skip to content

Commit 6d6b943

Browse files
committed
size_hint for hash tables does already the right thing, not need for additional checks
1 parent 72dfbcf commit 6d6b943

File tree

2 files changed

+3
-5
lines changed

2 files changed

+3
-5
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5451,15 +5451,13 @@ def duplicated(
54515451
4 True
54525452
dtype: bool
54535453
"""
5454-
from pandas._libs.hashtable import SIZE_HINT_LIMIT, duplicated_int64
5454+
from pandas._libs.hashtable import duplicated_int64
54555455

54565456
if self.empty:
54575457
return self._constructor_sliced(dtype=bool)
54585458

54595459
def f(vals):
5460-
labels, shape = algorithms.factorize(
5461-
vals, size_hint=min(len(self), SIZE_HINT_LIMIT)
5462-
)
5460+
labels, shape = algorithms.factorize(vals, size_hint=len(self))
54635461
return labels.astype("i8", copy=False), len(shape)
54645462

54655463
if subset is None:

pandas/core/sorting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ def compress_group_index(group_index, sort: bool = True):
593593
space can be huge, so this function compresses it, by computing offsets
594594
(comp_ids) into the list of unique labels (obs_group_ids).
595595
"""
596-
size_hint = min(len(group_index), hashtable.SIZE_HINT_LIMIT)
596+
size_hint = len(group_index)
597597
table = hashtable.Int64HashTable(size_hint)
598598

599599
group_index = ensure_int64(group_index)

0 commit comments

Comments
 (0)