From a2540a36c4f5fca9b9f397d0cff25e0b7db6d623 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 17 Dec 2020 09:28:11 -0800 Subject: [PATCH 1/3] BENCH: Increase sample of CategoricalIndexIndexing.time_get_indexer_list benchmark --- asv_bench/benchmarks/indexing.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 38d1f64bd5f4e..39396d9d388c3 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -256,7 +256,17 @@ def setup(self, index): "non_monotonic": CategoricalIndex(list("abc" * N)), } self.data = indices[index] - self.data_unique = CategoricalIndex(list(string.printable)) + self.data_unique = CategoricalIndex( + [ + "".join( + np.random.choice( + list(string.printable), + np.random.randint(1, len(string.printable)), + ) + ) + for _ in range(N) + ] + ) self.int_scalar = 10000 self.int_list = list(range(10000)) From baeb2a50820c1c91a07e59a1abe0b687d6d5b378 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 17 Dec 2020 10:33:39 -0800 Subject: [PATCH 2/3] simplify --- asv_bench/benchmarks/indexing.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 39396d9d388c3..1628567cc792d 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -3,7 +3,6 @@ lower-level methods directly on Index and subclasses, see index_object.py, indexing_engine.py, and index_cached.py """ -import string import warnings import numpy as np @@ -256,17 +255,7 @@ def setup(self, index): "non_monotonic": CategoricalIndex(list("abc" * N)), } self.data = indices[index] - self.data_unique = CategoricalIndex( - [ - "".join( - np.random.choice( - list(string.printable), - np.random.randint(1, len(string.printable)), - ) - ) - for _ in range(N) - ] - ) + self.data_unique = CategoricalIndex(["a" * i for i in range(1, N + 1)]) self.int_scalar = 10000 self.int_list = list(range(10000)) From 0fb99bcede29694da7bd14efac27ecc8ecfaad0f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 17 Dec 2020 11:59:42 -0800 Subject: [PATCH 3/3] Dont make long strings --- asv_bench/benchmarks/indexing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 1628567cc792d..e95e5bec5849c 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -3,6 +3,8 @@ lower-level methods directly on Index and subclasses, see index_object.py, indexing_engine.py, and index_cached.py """ +import itertools +import string import warnings import numpy as np @@ -255,7 +257,9 @@ def setup(self, index): "non_monotonic": CategoricalIndex(list("abc" * N)), } self.data = indices[index] - self.data_unique = CategoricalIndex(["a" * i for i in range(1, N + 1)]) + self.data_unique = CategoricalIndex( + ["".join(perm) for perm in itertools.permutations(string.printable, 3)] + ) self.int_scalar = 10000 self.int_list = list(range(10000))