diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index d6934a3ca2a6c..67c74f9a04618 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2853,14 +2853,12 @@ See the :ref:`cookbook` for some advanced strategies. The `xlrd `__ package is now only for reading old-style ``.xls`` files. - Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` + Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` would result in using the ``xlrd`` engine in many cases, including new - Excel 2007+ (``.xlsx``) files. - If `openpyxl `__ is installed, - many of these cases will now default to using the ``openpyxl`` engine. - See the :func:`read_excel` documentation for more details. + Excel 2007+ (``.xlsx``) files. pandas will now default to using the + `openpyxl `__ engine. - Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + It is strongly encouraged to install ``openpyxl`` to read Excel 2007+ (``.xlsx``) files. **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** This is no longer supported, switch to using ``openpyxl`` instead. diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..8e3978ed9fe1a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -8,6 +8,16 @@ including other versions of pandas. {{ header }} +.. warning:: + + When reading new Excel 2007+ (``.xlsx``) files, the default argument + ``engine=None`` to :func:`~pandas.read_excel` will now result in using the + `openpyxl `_ engine in all cases + when the option :attr:`io.excel.xlsx.reader` is set to ``"auto"``. + Previously, some cases would use the + `xlrd `_ engine instead. See + :ref:`What's new 1.2.0 ` for background on this change. + .. --------------------------------------------------------------------------- Enhancements diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f12a530ea6c34..f95e87b7a2b12 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -129,11 +129,9 @@ ``pyxlsb`` will be used. .. versionadded:: 1.3.0 - - Otherwise if `openpyxl `_ is installed, - then ``openpyxl`` will be used. - - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. This - case will raise a ``ValueError`` in a future version of pandas. + - Otherwise ``openpyxl`` will be used. + + .. versionchanged:: 1.3.0 converters : dict, default None Dict of functions for converting values in certain columns. Keys can @@ -997,7 +995,7 @@ class ExcelFile: Parameters ---------- path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath), - a file-like object, xlrd workbook or openpypl workbook. + a file-like object, xlrd workbook or openpyxl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None @@ -1111,9 +1109,7 @@ def __init__( stacklevel = 2 warnings.warn( f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. As a result, the " - f"openpyxl engine will be used if it is installed and the " - f"engine argument is not specified. Install " + f"only the xls format is supported. Install " f"openpyxl instead.", FutureWarning, stacklevel=stacklevel, diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 01ccc9d15a6a3..4e1d572aae569 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -62,13 +62,6 @@ def get_default_engine(ext, mode="reader"): _default_writers["xlsx"] = "xlsxwriter" return _default_writers[ext] else: - if ( - import_optional_dependency("openpyxl", errors="ignore") is None - and import_optional_dependency("xlrd", errors="ignore") is not None - ): - # if no openpyxl but xlrd installed, return xlrd - # the version is handled elsewhere - _default_readers["xlsx"] = "xlrd" return _default_readers[ext] diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index a594718bd62d9..71abb11d2616d 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -117,6 +117,30 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "read_excel", func) + def test_engine_used(self, read_ext, engine, monkeypatch): + # GH 38884 + def parser(self, *args, **kwargs): + return self.engine + + monkeypatch.setattr(pd.ExcelFile, "parse", parser) + + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + def test_usecols_int(self, read_ext, df_ref): df_ref = df_ref.reindex(columns=["A", "B", "C"]) @@ -1164,6 +1188,24 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "ExcelFile", func) + def test_engine_used(self, read_ext, engine, monkeypatch): + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with pd.ExcelFile("test1" + read_ext) as excel: + result = excel.engine + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + def test_excel_passes_na(self, read_ext): with pd.ExcelFile("test4" + read_ext) as excel: parsed = pd.read_excel(