Skip to content

Commit 6cbb84b

Browse files
Change ExcelFile's docstring and read_excel docs in order to add information about engine compatibility and also the support of pyxlsb engine.
1 parent fadb59b commit 6cbb84b

File tree

1 file changed

+17
-53
lines changed

1 file changed

+17
-53
lines changed

pandas/io/excel/_base.py

Lines changed: 17 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"""
3535
Read an Excel file into a pandas DataFrame.
3636
37-
Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions
37+
Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions
3838
read from a local filesystem or URL. Supports an option to read
3939
a single sheet or a list of sheets.
4040
@@ -103,12 +103,12 @@
103103
of dtype conversion.
104104
engine : str, default None
105105
If io is not a buffer or path, this must be set to identify io.
106-
Acceptable values are None, "auto", "xlrd", "openpyxl", "odf", or "pyxlsb".
107-
File compatibility:
108-
"xlrd" engine supports .xls, and .xlsx files.
109-
"openpyxl" engine supports .xlsx, .xlsm, .xltx, and .xltm files.
110-
"odf" engine supports .odf files.
111-
"pyxlsb" engine supports .xlsb files.
106+
Acceptable values are None, "xlrd", "openpyxl", "odf", or "pyxlsb".
107+
Engine compatibility:
108+
"xlrd" supports most old/new Excel file formats.
109+
"openpyxl" engine supports newer Excel file formats.
110+
"odf" engine supports OpenDocument file formats (.odf, .ods, .odt).
111+
"pyxlsb" engine supports Binary Excel files.
112112
converters : dict, default None
113113
Dict of functions for converting values in certain columns. Keys can
114114
either be integers or column labels, values are functions that take one
@@ -789,23 +789,22 @@ def close(self):
789789
class ExcelFile:
790790
"""
791791
Class for parsing tabular excel sheets into DataFrame objects.
792-
Uses xlrd. See read_excel for more documentation
792+
Uses xlrd by default. See read_excel for more documentation
793793
794794
Parameters
795795
----------
796796
io : str, path object (pathlib.Path or py._path.local.LocalPath),
797797
a file-like object, xlrd workbook or openpypl workbook.
798798
If a string or path object, expected to be a path to a
799-
.xls, .xlsx, .xlsb, .xlsm, .xltx, .xltm or .odf file.
799+
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
800800
engine : str, default None
801801
If io is not a buffer or path, this must be set to identify io.
802-
Acceptable values are None, ``auto``, ``xlrd``, ``openpyxl``,
803-
``odf``, or ``pyxlsb``.
804-
File compatibility:
805-
``xlrd`` engine supports .xls, and .xlsx files.
806-
``openpyxl`` engine supports .xlsx, .xlsm, .xltx, and .xltm files.
807-
``odf`` engine supports .odf files.
808-
``pyxlsb`` engine supports .xlsb files.
802+
Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``.
803+
Engine compatibility:
804+
``xlrd`` supports most old/new Excel file formats.
805+
``openpyxl`` engine supports newer Excel file formats.
806+
``odf`` engine supports OpenDocument file formats (.odf, .ods, .odt).
807+
``pyxlsb`` engine supports Binary Excel files.
809808
"""
810809

811810
from pandas.io.excel._odfreader import _ODFReader
@@ -820,27 +819,18 @@ class ExcelFile:
820819
"pyxlsb": _PyxlsbReader,
821820
}
822821

823-
_supported_engine_filetypes = {
824-
"xlrd": ["xls", "xlsx"],
825-
"openpyxl": ["xlsx", "xlsm", "xltx", "xltm"],
826-
"odf": ["odf"],
827-
"pyxlsb": ["xlsb"],
828-
}
829-
830822
def __init__(self, io, engine=None):
831823
if engine is None:
832824
engine = "xlrd"
833825
if engine not in self._engines:
834826
raise ValueError(f"Unknown engine: {engine}")
835827

836828
self.engine = engine
837-
# could be a str, ExcelFile, Book, etc.
829+
830+
# Could be a str, ExcelFile, Book, etc.
838831
self.io = io
839832
# Always a string
840833
self._io = stringify_path(io)
841-
# Check engine-extension compatibility
842-
ext = os.path.splitext(io)[-1][1:]
843-
self.check_extension(engine, ext)
844834

845835
self._reader = self._engines[engine](self._io)
846836

@@ -917,32 +907,6 @@ def book(self):
917907
def sheet_names(self):
918908
return self._reader.sheet_names
919909

920-
@classmethod
921-
def check_extension(cls, engine, ext):
922-
"""
923-
checks that the provided path's extension is supported by the reader engine.
924-
If it isn't supported, raises a ValueError.
925-
"""
926-
if ext.startswith("."):
927-
ext = ext[1:]
928-
if ext not in cls._supported_engine_filetypes.get(engine):
929-
supporting_engines = [
930-
k for k, v in cls._supported_engine_filetypes.items() if ext in v
931-
]
932-
if not supporting_engines:
933-
eng_info = " No engines currently support the provided file extension."
934-
else:
935-
eng_info = (
936-
" Use engine(s) "
937-
+ ", ".join("'{0}'".format(e) for e in supporting_engines)
938-
+ " instead."
939-
)
940-
raise ValueError(
941-
f"Unsupported extension for engine '{engine}': '.{ext}'." + eng_info
942-
)
943-
else:
944-
return True
945-
946910
def close(self):
947911
"""close io if necessary"""
948912
self._reader.close()

0 commit comments

Comments
 (0)