103
103
of dtype conversion.
104
104
engine : str, default None
105
105
If io is not a buffer or path, this must be set to identify io.
106
- Acceptable values are None, "xlrd", "openpyxl" or "odf".
106
+ Acceptable values are None, "auto", "xlrd", "openpyxl", "odf", or "pyxlsb".
107
+ File compatibility:
108
+ "xlrd" engine supports .xls, and .xlsx files.
109
+ "openpyxl" engine supports .xlsx, .xlsm, .xltx, and .xltm files.
110
+ "odf" engine supports .odf files.
111
+ "pyxlsb" engine supports .xlsb files.
107
112
converters : dict, default None
108
113
Dict of functions for converting values in certain columns. Keys can
109
114
either be integers or column labels, values are functions that take one
@@ -790,11 +795,17 @@ class ExcelFile:
790
795
----------
791
796
io : str, path object (pathlib.Path or py._path.local.LocalPath),
792
797
a file-like object, xlrd workbook or openpypl workbook.
793
- If a string or path object, expected to be a path to xls, xlsx or odf file.
798
+ If a string or path object, expected to be a path to a
799
+ .xls, .xlsx, .xlsb, .xlsm, .xltx, .xltm or .odf file.
794
800
engine : str, default None
795
801
If io is not a buffer or path, this must be set to identify io.
796
- Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``.
797
- Note that ``odf`` reads tables out of OpenDocument formatted files.
802
+ Acceptable values are None, ``auto``, ``xlrd``, ``openpyxl``,
803
+ ``odf``, or ``pyxlsb``.
804
+ File compatibility:
805
+ ``xlrd`` engine supports .xls, and .xlsx files.
806
+ ``openpyxl`` engine supports .xlsx, .xlsm, .xltx, and .xltm files.
807
+ ``odf`` engine supports .odf files.
808
+ ``pyxlsb`` engine supports .xlsb files.
798
809
"""
799
810
800
811
from pandas .io .excel ._odfreader import _ODFReader
@@ -809,6 +820,13 @@ class ExcelFile:
809
820
"pyxlsb" : _PyxlsbReader ,
810
821
}
811
822
823
+ _supported_engine_filetypes = {
824
+ "xlrd" : ["xls" , "xlsx" ],
825
+ "openpyxl" : ["xlsx" , "xlsm" , "xltx" , "xltm" ],
826
+ "odf" : ["odf" ],
827
+ "pyxlsb" : ["xlsb" ],
828
+ }
829
+
812
830
def __init__ (self , io , engine = None ):
813
831
if engine is None :
814
832
engine = "xlrd"
@@ -820,6 +838,9 @@ def __init__(self, io, engine=None):
820
838
self .io = io
821
839
# Always a string
822
840
self ._io = stringify_path (io )
841
+ # Check engine-extension compatibility
842
+ ext = os .path .splitext (io )[- 1 ][1 :]
843
+ self .check_extension (engine , ext )
823
844
824
845
self ._reader = self ._engines [engine ](self ._io )
825
846
@@ -896,6 +917,32 @@ def book(self):
896
917
def sheet_names (self ):
897
918
return self ._reader .sheet_names
898
919
920
+ @classmethod
921
+ def check_extension (cls , engine , ext ):
922
+ """
923
+ checks that the provided path's extension is supported by the reader engine.
924
+ If it isn't supported, raises a ValueError.
925
+ """
926
+ if ext .startswith ("." ):
927
+ ext = ext [1 :]
928
+ if ext not in cls ._supported_engine_filetypes .get (engine ):
929
+ supporting_engines = [
930
+ k for k , v in cls ._supported_engine_filetypes .items () if ext in v
931
+ ]
932
+ if not supporting_engines :
933
+ eng_info = " No engines currently support the provided file extension."
934
+ else :
935
+ eng_info = (
936
+ " Use engine(s) "
937
+ + ", " .join ("'{0}'" .format (e ) for e in supporting_engines )
938
+ + " instead."
939
+ )
940
+ raise ValueError (
941
+ f"Unsupported extension for engine '{ engine } ': '.{ ext } '." + eng_info
942
+ )
943
+ else :
944
+ return True
945
+
899
946
def close (self ):
900
947
"""close io if necessary"""
901
948
self ._reader .close ()
0 commit comments