Skip to content

Commit a251537

Browse files
test for arrays with 2 chunks
1 parent 42ca9c3 commit a251537

File tree

1 file changed

+38
-10
lines changed

1 file changed

+38
-10
lines changed

pandas/tests/extension/test_string.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,29 @@
2626
from pandas.tests.extension import base
2727

2828

29+
def split_array(arr):
30+
if not isinstance(arr.dtype, ArrowStringDtype):
31+
pytest.skip("chunked array n/a")
32+
33+
def _split_array(arr):
34+
import pyarrow as pa
35+
36+
arrow_array = arr._data
37+
split = len(arrow_array) // 2
38+
arrow_array = pa.chunked_array(
39+
[*arrow_array[:split].chunks, *arrow_array[split:].chunks]
40+
)
41+
assert arrow_array.num_chunks == 2
42+
return type(arr)(arrow_array)
43+
44+
return _split_array(arr)
45+
46+
47+
@pytest.fixture(params=[True, False])
48+
def chunked(request):
49+
return request.param
50+
51+
2952
@pytest.fixture(
3053
params=[
3154
StringDtype,
@@ -39,28 +62,32 @@ def dtype(request):
3962

4063

4164
@pytest.fixture
42-
def data(dtype):
65+
def data(dtype, chunked):
4366
strings = np.random.choice(list(string.ascii_letters), size=100)
4467
while strings[0] == strings[1]:
4568
strings = np.random.choice(list(string.ascii_letters), size=100)
4669

47-
return dtype.construct_array_type()._from_sequence(strings)
70+
arr = dtype.construct_array_type()._from_sequence(strings)
71+
return split_array(arr) if chunked else arr
4872

4973

5074
@pytest.fixture
51-
def data_missing(dtype):
75+
def data_missing(dtype, chunked):
5276
"""Length 2 array with [NA, Valid]"""
53-
return dtype.construct_array_type()._from_sequence([pd.NA, "A"])
77+
arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"])
78+
return split_array(arr) if chunked else arr
5479

5580

5681
@pytest.fixture
57-
def data_for_sorting(dtype):
58-
return dtype.construct_array_type()._from_sequence(["B", "C", "A"])
82+
def data_for_sorting(dtype, chunked):
83+
arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"])
84+
return split_array(arr) if chunked else arr
5985

6086

6187
@pytest.fixture
62-
def data_missing_for_sorting(dtype):
63-
return dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"])
88+
def data_missing_for_sorting(dtype, chunked):
89+
arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"])
90+
return split_array(arr) if chunked else arr
6491

6592

6693
@pytest.fixture
@@ -69,10 +96,11 @@ def na_value():
6996

7097

7198
@pytest.fixture
72-
def data_for_grouping(dtype):
73-
return dtype.construct_array_type()._from_sequence(
99+
def data_for_grouping(dtype, chunked):
100+
arr = dtype.construct_array_type()._from_sequence(
74101
["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"]
75102
)
103+
return split_array(arr) if chunked else arr
76104

77105

78106
class TestDtype(base.BaseDtypeTests):

0 commit comments

Comments
 (0)