Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
df_test = pd.DataFrame([[1,1],[2,2]])
df_test.columns = ["c1", "c2"]
df_test.to_csv("./test.csv.tar.gz", index=False)
Issue Description
Executing the above code using pandas == 1.5.0
, and reading the saved table via
pd.read_csv("./test_old.csv.tar.gz")
using lower versions produces
test.csv.tar.gz c2
0 1.0 1.0
1 2.0 2.0
2 NaN NaN
Executing the above code using pandas <= 1.4.4
, and reading the saved table using pandas == 1.5.0
would raise the following ReadError
:
~/.conda/envs/default/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
212
213 return cast(F, wrapper)
~/.conda/envs/default/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
315 stacklevel=find_stack_level(inspect.currentframe()),
316 )
--> 317 return func(*args, **kwargs)
318
319 return wrapper
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
948 kwds.update(kwds_defaults)
949
--> 950 return _read(filepath_or_buffer, kwds)
951
952
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
603
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
606
607 if chunksize or iterator:
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
1440
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
1443
1444 def close(self) -> None:
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine)
1727 is_text = False
1728 mode = "rb"
-> 1729 self.handles = get_handle(
1730 f,
1731 mode,
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
798 compression_args.setdefault("mode", ioargs.mode)
799 if isinstance(handle, str):
--> 800 handle = _BytesTarFile(name=handle, **compression_args)
801 else:
802 # error: Argument "fileobj" to "_BytesTarFile" has incompatible
~/.conda/envs/default/lib/python3.9/site-packages/pandas/io/common.py in __init__(self, name, mode, fileobj, archive_name, **kwargs)
965 # type "Union[ReadBuffer[bytes], WriteBuffer[bytes], None]"; expected
966 # "Optional[IO[bytes]]"
--> 967 self.buffer = tarfile.TarFile.open(
968 name=name,
969 mode=self.extend_mode(mode),
~/.conda/envs/default/lib/python3.9/tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1614 fileobj.seek(saved_pos)
1615 continue
-> 1616 raise ReadError("file could not be opened successfully")
1617
1618 elif ":" in mode:
ReadError: file could not be opened successfully
Expected Behavior
The table should be read correctly as
c1 c2
0 1 1
1 2 2
Installed Versions
INSTALLED VERSIONS
commit : ca60aab
python : 3.9.10.final.0
python-bits : 64
OS : Linux
OS-release : 4.14.290-217.505.amzn2.x86_64
Version : #1 SMP Wed Aug 10 09:52:16 UTC 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.4.4 # 1.5.0, 1.1.5 also used for testing
numpy : 1.22.3
pytz : 2022.1
dateutil : 2.8.2
setuptools : 60.9.3
pip : 21.2.4
Cython : 0.29.28
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.9.0
html5lib : 1.1
pymysql : None
psycopg2 : None
jinja2 : 3.0.3
IPython : 7.31.1
pandas_datareader: None
bs4 : 4.11.1
bottleneck : None
brotli : None
fastparquet : None
fsspec : 2022.3.0
gcsfs : None
markupsafe : 2.1.0
matplotlib : 3.5.2
numba : 0.55.2
numexpr : None
odfpy : None
openpyxl : 3.0.10
pandas_gbq : None
pyarrow : 8.0.0
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : 1.8.0
snappy : None
sqlalchemy : None
tables : None
tabulate : 0.8.9
xarray : None
xlrd : None
xlwt : None
zstandard : None