Closed
Description
Fastparquet updated to 0.7 which is causing failure.
See these logs from database build for example.
___________________________ test_cross_engine_fp_pa ____________________________
[gw1] linux -- Python 3.8.10 /usr/share/miniconda/envs/pandas-dev/bin/python
request = <FixtureRequest for <Function test_cross_engine_fp_pa>>
df_cross_compat = a b d e f
0 a 1 4.0 True 2013-01-01
1 b 2 5.0 False 2013-01-02
2 c 3 6.0 True 2013-01-03
pa = 'pyarrow', fp = 'fastparquet'
def test_cross_engine_fp_pa(request, df_cross_compat, pa, fp):
# cross-compat with differing reading/writing engines
df = df_cross_compat
with tm.ensure_clean() as path:
df.to_parquet(path, engine=fp, compression=None)
with catch_warnings(record=True):
result = read_parquet(path, engine=pa)
> tm.assert_frame_equal(result, df)
E AssertionError: Attributes of DataFrame.iloc[:, 4] (column name="f") are different
E
E Attribute "dtype" are different
E [left]: datetime64[ns, UTC]
E [right]: datetime64[ns]
pandas/tests/io/test_parquet.py:337: AssertionError
______________________ TestParquetFastParquet.test_basic _______________________
[gw1] linux -- Python 3.8.10 /usr/share/miniconda/envs/pandas-dev/bin/python
self = <pandas.tests.io.test_parquet.TestParquetFastParquet object at 0x7fd39d1cc070>
fp = 'fastparquet'
df_full = string string_with_nan ... datetime_tz timedelta
0 a a ... 2013-01-01 00:00:00-05...01-02 00:00:00-05:00 2 days
2 c c ... 2013-01-03 00:00:00-05:00 3 days
[3 rows x 14 columns]
def test_basic(self, fp, df_full):
df = df_full
dti = pd.date_range("20130101", periods=3, tz="US/Eastern")
dti = dti._with_freq(None) # freq doesn't round-trip
df["datetime_tz"] = dti
df["timedelta"] = pd.timedelta_range("1 day", periods=3)
> check_round_trip(df, fp)
pandas/tests/io/test_parquet.py:915:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_parquet.py:220: in check_round_trip
compare(repeat)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
repeat = 2
def compare(repeat):
for _ in range(repeat):
df.to_parquet(path, **write_kwargs)
with catch_warnings(record=True):
actual = read_parquet(path, **read_kwargs)
> tm.assert_frame_equal(
expected,
actual,
check_names=check_names,
check_like=check_like,
check_dtype=check_dtype,
)
E AssertionError: Attributes of DataFrame.iloc[:, 6] (column name="uint") are different
E
E Attribute "dtype" are different
E [left]: uint8
E [right]: UInt8
pandas/tests/io/test_parquet.py:210: AssertionError
__________________ TestParquetFastParquet.test_bool_with_none __________________
[gw1] linux -- Python 3.8.10 /usr/share/miniconda/envs/pandas-dev/bin/python
self = <pandas.tests.io.test_parquet.TestParquetFastParquet object at 0x7fd39fd34af0>
fp = 'fastparquet'
def test_bool_with_none(self, fp):
df = pd.DataFrame({"a": [True, None, False]})
expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
> check_round_trip(df, fp, expected=expected)
pandas/tests/io/test_parquet.py:928:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_parquet.py:220: in check_round_trip
compare(repeat)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
repeat = 2
def compare(repeat):
for _ in range(repeat):
df.to_parquet(path, **write_kwargs)
with catch_warnings(record=True):
actual = read_parquet(path, **read_kwargs)
> tm.assert_frame_equal(
expected,
actual,
check_names=check_names,
check_like=check_like,
check_dtype=check_dtype,
)
E AssertionError: Attributes of DataFrame.iloc[:, 0] (column name="a") are different
E
E Attribute "dtype" are different
E [left]: float16
E [right]: boolean