diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 2b60871cbf82b..b30ad9f7b4905 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -187,7 +187,7 @@ def time_remove_categories(self): class Rank: def setup(self): N = 10**5 - ncats = 100 + ncats = 20 self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) self.s_str_cat = pd.Series(self.s_str, dtype="category") diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 1b236cdc99bed..fc48317114e23 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -305,7 +305,7 @@ def makeUnicodeIndex(k=10, name=None): def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): """make a length k index or n categories""" - x = rands_array(nchars=4, size=n) + x = rands_array(nchars=4, size=n, replace=False) return CategoricalIndex( Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs ) diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py index a646d7639a4e6..f90e18a2020c5 100644 --- a/pandas/_testing/_random.py +++ b/pandas/_testing/_random.py @@ -14,12 +14,12 @@ def randbool(size=(), p: float = 0.5): ) -def rands_array(nchars, size, dtype="O"): +def rands_array(nchars, size, dtype="O", replace=True): """ Generate an array of byte strings. """ retval = ( - np.random.choice(RANDS_CHARS, size=nchars * np.prod(size)) + np.random.choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace) .view((np.str_, nchars)) .reshape(size) )