Skip to content

Commit b837945

Browse files
authored
fix: large_list and large_string unit test for read_parquet_metadata (#3089)
* Adds unit test for pyarrow large lists and strings * Fixes typo in test func name
1 parent 322ad04 commit b837945

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

tests/unit/test_s3_parquet.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,27 @@ def test_read_parquet_metadata_nonexistent_file(path):
6262
wr.s3.read_parquet_metadata(path + "non-existent-file.parquet")
6363

6464

65+
def test_read_parquet_metadata_large_dtype(path):
66+
schema = pa.schema(
67+
[
68+
pa.field("c0", pa.large_list(pa.large_string())),
69+
pa.field("c1", pa.large_string()),
70+
]
71+
)
72+
c0 = pa.array([["a", "b", "c"], ["d", "e", "f"], ["g", "h", "i"]])
73+
c1 = pa.array(["a", "b", "c"])
74+
df = pa.table([c0, c1], schema=schema)
75+
76+
# use pyarrow-backed dataframe to simulate the large_list and large_string dtypes
77+
pandas_df = df.to_pandas(types_mapper=lambda pa_dtype: pd.ArrowDtype(pa_dtype))
78+
79+
wr.s3.to_parquet(pandas_df, path)
80+
columns_types, _ = wr.s3.read_parquet_metadata(path)
81+
assert len(columns_types) == len(df.columns)
82+
assert columns_types.get("c0") == "array<string>"
83+
assert columns_types.get("c1") == "string"
84+
85+
6586
@pytest.mark.parametrize(
6687
"partition_cols",
6788
[

0 commit comments

Comments
 (0)