Skip to content

Commit 9a85cba

Browse files
Skn0ttMargarete01
andauthored
add support for .tar archives
python's `tarfile` supports gzip, xz and bz2 encoding, so we don't need to make any special cases for that. co-authored-by: Margarete Dippel <margarete01@users.noreply.github.com>
1 parent c1823ef commit 9a85cba

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

pandas/io/common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import mmap
1919
import os
2020
from pathlib import Path
21+
import tarfile
2122
import tempfile
2223
from typing import (
2324
IO,
@@ -520,6 +521,9 @@ def infer_compression(
520521
# Cannot infer compression of a buffer, assume no compression
521522
return None
522523

524+
if ".tar" in filepath_or_buffer:
525+
return "tar"
526+
523527
# Infer compression from the filename/URL extension
524528
for compression, extension in _compression_to_extension.items():
525529
if filepath_or_buffer.lower().endswith(extension):
@@ -747,6 +751,21 @@ def get_handle(
747751
f"Only one file per ZIP: {zip_names}"
748752
)
749753

754+
# TAR Encoding
755+
elif compression == "tar":
756+
tar = tarfile.open(handle, "r:*")
757+
handles.append(tar)
758+
files = tar.getnames()
759+
if len(files) == 1:
760+
handle = tar.extractfile(files[0])
761+
elif len(files) == 0:
762+
raise ValueError(f"Zero files found in TAR archive {path_or_buf}")
763+
else:
764+
raise ValueError(
765+
"Multiple files found in TAR archive. "
766+
f"Only one file per TAR archive: {files}"
767+
)
768+
750769
# XZ Compression
751770
elif compression == "xz":
752771
handle = get_lzma_file()(handle, ioargs.mode)

0 commit comments

Comments
 (0)