Skip to content

Commit 42ca9c3

Browse files
update algorithms.Factorize.time_factorize
1 parent 73c7de9 commit 42ca9c3

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

asv_bench/benchmarks/algorithms.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas._libs import lib
66

77
import pandas as pd
8+
from pandas.core.arrays.string_arrow import ArrowStringDtype
89

910
from .pandas_vb_common import tm
1011

@@ -43,23 +44,34 @@ class Factorize:
4344
"datetime64[ns, tz]",
4445
"Int64",
4546
"boolean",
47+
"string_arrow",
4648
],
4749
]
4850
param_names = ["unique", "sort", "dtype"]
4951

5052
def setup(self, unique, sort, dtype):
5153
N = 10 ** 5
54+
string_index = tm.makeStringIndex(N)
55+
try:
56+
string_arrow = pd.array(string_index, dtype=ArrowStringDtype())
57+
except ImportError:
58+
string_arrow = None
59+
60+
if dtype == "string_arrow" and not string_arrow:
61+
raise NotImplementedError
62+
5263
data = {
5364
"int": pd.Int64Index(np.arange(N)),
5465
"uint": pd.UInt64Index(np.arange(N)),
5566
"float": pd.Float64Index(np.random.randn(N)),
56-
"string": tm.makeStringIndex(N),
67+
"string": string_index,
5768
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
5869
"datetime64[ns, tz]": pd.date_range(
5970
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
6071
),
6172
"Int64": pd.array(np.arange(N), dtype="Int64"),
6273
"boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
74+
"string_arrow": string_arrow,
6375
}[dtype]
6476
if not unique:
6577
data = data.repeat(5)

0 commit comments

Comments
 (0)