|
5 | 5 | from pandas._libs import lib
|
6 | 6 |
|
7 | 7 | import pandas as pd
|
| 8 | +from pandas.core.arrays.string_arrow import ArrowStringDtype |
8 | 9 |
|
9 | 10 | from .pandas_vb_common import tm
|
10 | 11 |
|
@@ -43,23 +44,34 @@ class Factorize:
|
43 | 44 | "datetime64[ns, tz]",
|
44 | 45 | "Int64",
|
45 | 46 | "boolean",
|
| 47 | + "string_arrow", |
46 | 48 | ],
|
47 | 49 | ]
|
48 | 50 | param_names = ["unique", "sort", "dtype"]
|
49 | 51 |
|
50 | 52 | def setup(self, unique, sort, dtype):
|
51 | 53 | N = 10 ** 5
|
| 54 | + string_index = tm.makeStringIndex(N) |
| 55 | + try: |
| 56 | + string_arrow = pd.array(string_index, dtype=ArrowStringDtype()) |
| 57 | + except ImportError: |
| 58 | + string_arrow = None |
| 59 | + |
| 60 | + if dtype == "string_arrow" and not string_arrow: |
| 61 | + raise NotImplementedError |
| 62 | + |
52 | 63 | data = {
|
53 | 64 | "int": pd.Int64Index(np.arange(N)),
|
54 | 65 | "uint": pd.UInt64Index(np.arange(N)),
|
55 | 66 | "float": pd.Float64Index(np.random.randn(N)),
|
56 |
| - "string": tm.makeStringIndex(N), |
| 67 | + "string": string_index, |
57 | 68 | "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
|
58 | 69 | "datetime64[ns, tz]": pd.date_range(
|
59 | 70 | "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
|
60 | 71 | ),
|
61 | 72 | "Int64": pd.array(np.arange(N), dtype="Int64"),
|
62 | 73 | "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
|
| 74 | + "string_arrow": string_arrow, |
63 | 75 | }[dtype]
|
64 | 76 | if not unique:
|
65 | 77 | data = data.repeat(5)
|
|
0 commit comments