Closed
Description
only seems to happen sometimes in "Database / Linux_py37_cov"
_______________ test_to_csv_compression_encoding_gcs[zip-cp1251] _______________
[gw1] linux -- Python 3.7.9 /usr/share/miniconda/envs/pandas-dev/bin/python
gcs_buffer = <_io.BytesIO object at 0x7f4959d0b650>, compression_only = 'zip'
encoding = 'cp1251'
@td.skip_if_no("gcsfs")
@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding):
"""
Compression and encoding should with GCS.
GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and
GH 32392 (read_csv, encoding)
"""
from fsspec import registry
registry.target.clear() # remove state
df = tm.makeDataFrame()
# reference of compressed and encoded file
compression = {"method": compression_only}
if compression_only == "gzip":
compression["mtime"] = 1 # be reproducible
buffer = BytesIO()
df.to_csv(buffer, compression=compression, encoding=encoding, mode="wb")
# write compressed file with explicit compression
path_gcs = "gs://test/test.csv"
df.to_csv(path_gcs, compression=compression, encoding=encoding)
> assert gcs_buffer.getvalue() == buffer.getvalue()
E AssertionError: assert b'PK\x03\x04\...0\x00\x00\x00' == b'PK\x03\x04\...0\x00\x00\x00'
E At index 10 diff: b'\xa6' != b'\xa5'
E Use -v to get the full diff