Skip to content

Commit c6573ef

Browse files
authored
feat: assure it respects .gz endings
1 parent 5f22df7 commit c6573ef

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

pandas/io/common.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,25 @@ def __init__(
875875
super().__init__(name=name, mode=mode, fileobj=fileobj, **kwargs)
876876

877877
@classmethod
878-
def open(cls, mode="r", **kwargs):
878+
def open(cls, name=None, mode="r", **kwargs):
879879
mode = mode.replace("b", "")
880-
return super().open(mode=mode, **kwargs)
880+
return super().open(name=name, mode=cls.extend_mode(name, mode), **kwargs)
881+
882+
@classmethod
883+
def extend_mode(
884+
cls, name: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], mode: str
885+
) -> str:
886+
if mode != "w":
887+
return mode
888+
if isinstance(name, (os.PathLike, str)):
889+
filename = Path(name)
890+
if filename.suffix == ".gz":
891+
return mode + ":gz"
892+
elif filename.suffix == ".xz":
893+
return mode + ":xz"
894+
elif filename.suffix == ".bz2":
895+
return mode + ":bz2"
896+
return mode
881897

882898
def infer_filename(self):
883899
"""

pandas/tests/io/parser/test_compression.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import os
77
from pathlib import Path
8+
import tarfile
89
import zipfile
910

1011
import pytest
@@ -180,3 +181,26 @@ def test_ignore_compression_extension(all_parsers):
180181
Path(path_zip).write_text(Path(path_csv).read_text())
181182

182183
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
184+
185+
186+
@skip_pyarrow
187+
def test_writes_tar_gz(all_parsers):
188+
parser = all_parsers
189+
data = DataFrame(
190+
{
191+
"Country": ["Venezuela", "Venezuela"],
192+
"Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
193+
}
194+
)
195+
with tm.ensure_clean("test.tar.gz") as tar_path:
196+
data.to_csv(tar_path, index=False)
197+
198+
# test that read_csv infers .tar.gz to gzip:
199+
tm.assert_frame_equal(parser.read_csv(tar_path), data)
200+
201+
# test that file is indeed gzipped:
202+
with tarfile.open(tar_path, "r:gz") as tar:
203+
result = parser.read_csv(
204+
tar.extractfile(tar.getnames()[0]), compression="infer"
205+
)
206+
tm.assert_frame_equal(result, data)

0 commit comments

Comments
 (0)