Skip to content

Commit 7262fd3

Browse files
Clarify ExcelFile's engine compatibility with file types in the docstring GH34237. Add check to ExcelFile that verifies it.
1 parent 1f48d3d commit 7262fd3

File tree

1 file changed

+51
-4
lines changed

1 file changed

+51
-4
lines changed

pandas/io/excel/_base.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,12 @@
103103
of dtype conversion.
104104
engine : str, default None
105105
If io is not a buffer or path, this must be set to identify io.
106-
Acceptable values are None, "xlrd", "openpyxl" or "odf".
106+
Acceptable values are None, "auto", "xlrd", "openpyxl", "odf", or "pyxlsb".
107+
File compatibility:
108+
"xlrd" engine supports .xls, and .xlsx files.
109+
"openpyxl" engine supports .xlsx, .xlsm, .xltx, and .xltm files.
110+
"odf" engine supports .odf files.
111+
"pyxlsb" engine supports .xlsb files.
107112
converters : dict, default None
108113
Dict of functions for converting values in certain columns. Keys can
109114
either be integers or column labels, values are functions that take one
@@ -790,11 +795,17 @@ class ExcelFile:
790795
----------
791796
io : str, path object (pathlib.Path or py._path.local.LocalPath),
792797
a file-like object, xlrd workbook or openpypl workbook.
793-
If a string or path object, expected to be a path to xls, xlsx or odf file.
798+
If a string or path object, expected to be a path to a
799+
.xls, .xlsx, .xlsb, .xlsm, .xltx, .xltm or .odf file.
794800
engine : str, default None
795801
If io is not a buffer or path, this must be set to identify io.
796-
Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``.
797-
Note that ``odf`` reads tables out of OpenDocument formatted files.
802+
Acceptable values are None, ``auto``, ``xlrd``, ``openpyxl``,
803+
``odf``, or ``pyxlsb``.
804+
File compatibility:
805+
``xlrd`` engine supports .xls, and .xlsx files.
806+
``openpyxl`` engine supports .xlsx, .xlsm, .xltx, and .xltm files.
807+
``odf`` engine supports .odf files.
808+
``pyxlsb`` engine supports .xlsb files.
798809
"""
799810

800811
from pandas.io.excel._odfreader import _ODFReader
@@ -809,6 +820,13 @@ class ExcelFile:
809820
"pyxlsb": _PyxlsbReader,
810821
}
811822

823+
_supported_engine_filetypes = {
824+
"xlrd": ["xls", "xlsx"],
825+
"openpyxl": ["xlsx", "xlsm", "xltx", "xltm"],
826+
"odf": ["odf"],
827+
"pyxlsb": ["xlsb"],
828+
}
829+
812830
def __init__(self, io, engine=None):
813831
if engine is None:
814832
engine = "xlrd"
@@ -820,6 +838,9 @@ def __init__(self, io, engine=None):
820838
self.io = io
821839
# Always a string
822840
self._io = stringify_path(io)
841+
# Check engine-extension compatibility
842+
ext = os.path.splitext(io)[-1][1:]
843+
self.check_extension(engine, ext)
823844

824845
self._reader = self._engines[engine](self._io)
825846

@@ -896,6 +917,32 @@ def book(self):
896917
def sheet_names(self):
897918
return self._reader.sheet_names
898919

920+
@classmethod
921+
def check_extension(cls, engine, ext):
922+
"""
923+
checks that the provided path's extension is supported by the reader engine.
924+
If it isn't supported, raises a ValueError.
925+
"""
926+
if ext.startswith("."):
927+
ext = ext[1:]
928+
if ext not in cls._supported_engine_filetypes.get(engine):
929+
supporting_engines = [
930+
k for k, v in cls._supported_engine_filetypes.items() if ext in v
931+
]
932+
if not supporting_engines:
933+
eng_info = " No engines currently support the provided file extension."
934+
else:
935+
eng_info = (
936+
" Use engine(s) "
937+
+ ", ".join("'{0}'".format(e) for e in supporting_engines)
938+
+ " instead."
939+
)
940+
raise ValueError(
941+
f"Unsupported extension for engine '{engine}': '.{ext}'." + eng_info
942+
)
943+
else:
944+
return True
945+
899946
def close(self):
900947
"""close io if necessary"""
901948
self._reader.close()

0 commit comments

Comments
 (0)