From 77a2a70e5d74d35145acc46fc7c872db4229ff12 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 21 Oct 2020 12:07:50 +0200 Subject: [PATCH] TST: correct parquet test expected partition column dtype for pyarrow 2.0 --- pandas/tests/io/test_parquet.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 67ee9348394dd..ebafc05a9a28e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -614,16 +614,20 @@ def test_s3_roundtrip_for_dir( # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 # Previous behaviour was pyarrow partitioned columns become 'category' dtypes # These are added to back of dataframe on read. In new API category dtype is - # only used if partition field is string. - legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0") - if partition_col and legacy_read_table: - partition_col_type = "category" - else: - partition_col_type = "int32" - - expected_df[partition_col] = expected_df[partition_col].astype( - partition_col_type + # only used if partition field is string, but this changed again to use + # category dtype for all types (not only strings) in pyarrow 2.0.0 + pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and ( + LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0") ) + if partition_col: + if pa10: + partition_col_type = "int32" + else: + partition_col_type = "category" + + expected_df[partition_col] = expected_df[partition_col].astype( + partition_col_type + ) check_round_trip( df_compat,