Skip to content

Commit 51f1b1d

Browse files
fixup roundtrip tests
1 parent e57c850 commit 51f1b1d

File tree

3 files changed

+19
-10
lines changed

3 files changed

+19
-10
lines changed

pandas/conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,10 @@ def string_storage(request):
11521152
return request.param
11531153

11541154

1155+
# Alias so we can test with cartesian product of string_storage
1156+
string_storage2 = string_storage
1157+
1158+
11551159
@pytest.fixture(params=tm.BYTES_DTYPES)
11561160
def bytes_dtype(request):
11571161
"""

pandas/tests/arrays/string_/test_string.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,23 +431,25 @@ def test_arrow_array(dtype):
431431

432432

433433
@td.skip_if_no("pyarrow")
434-
def test_arrow_roundtrip(dtype):
434+
def test_arrow_roundtrip(dtype, string_storage2):
435435
# roundtrip possible from arrow 1.0.0
436436
import pyarrow as pa
437437

438438
data = pd.array(["a", "b", None], dtype=dtype)
439439
df = pd.DataFrame({"a": data})
440440
table = pa.table(df)
441441
assert table.field("a").type == "string"
442-
result = table.to_pandas()
443-
assert isinstance(result["a"].dtype, type(dtype))
444-
tm.assert_frame_equal(result, df)
442+
with pd.option_context("string_storage", string_storage2):
443+
result = table.to_pandas()
444+
assert isinstance(result["a"].dtype, pd.StringDtype)
445+
expected = df.astype(f"string[{string_storage2}]")
446+
tm.assert_frame_equal(result, expected)
445447
# ensure the missing value is represented by NA and not np.nan or None
446448
assert result.loc[2, "a"] is pd.NA
447449

448450

449451
@td.skip_if_no("pyarrow")
450-
def test_arrow_load_from_zero_chunks(dtype):
452+
def test_arrow_load_from_zero_chunks(dtype, string_storage2):
451453
# GH-41040
452454
import pyarrow as pa
453455

@@ -457,9 +459,11 @@ def test_arrow_load_from_zero_chunks(dtype):
457459
assert table.field("a").type == "string"
458460
# Instantiate the same table with no chunks at all
459461
table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
460-
result = table.to_pandas()
461-
assert isinstance(result["a"].dtype, type(dtype))
462-
tm.assert_frame_equal(result, df)
462+
with pd.option_context("string_storage", string_storage2):
463+
result = table.to_pandas()
464+
assert isinstance(result["a"].dtype, pd.StringDtype)
465+
expected = df.astype(f"string[{string_storage2}]")
466+
tm.assert_frame_equal(result, expected)
463467

464468

465469
def test_value_counts_na(dtype):

pandas/tests/io/test_parquet.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -810,10 +810,11 @@ def test_additional_extension_arrays(self, pa):
810810
check_round_trip(df, pa)
811811

812812
@td.skip_if_no("pyarrow", min_version="1.0.0")
813-
def test_pyarrow_backed_string_array(self, pa):
813+
def test_pyarrow_backed_string_array(self, pa, string_storage):
814814
# test ArrowStringArray supported through the __arrow_array__ protocol
815815
df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")})
816-
check_round_trip(df, pa, expected=df)
816+
with pd.option_context("string_storage", string_storage):
817+
check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]"))
817818

818819
@td.skip_if_no("pyarrow")
819820
def test_additional_extension_types(self, pa):

0 commit comments

Comments
 (0)