From e0fc50d10593d5247593f06a02a7a92e2888871a Mon Sep 17 00:00:00 2001 From: Miguel Marques Date: Tue, 19 May 2020 07:55:04 +0100 Subject: [PATCH 1/3] Change ExcelFile's docstring and read_excel docs in order to add information about engine compatibility and also the support of pyxlsb engine GH34237. --- pandas/io/excel/_base.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1139f640cef4..923e48e03fcb0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -34,7 +34,7 @@ """ Read an Excel file into a pandas DataFrame. -Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions +Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions read from a local filesystem or URL. Supports an option to read a single sheet or a list of sheets. @@ -103,7 +103,12 @@ of dtype conversion. engine : str, default None If io is not a buffer or path, this must be set to identify io. - Acceptable values are None, "xlrd", "openpyxl" or "odf". + Acceptable values are None, "xlrd", "openpyxl", "odf", or "pyxlsb". + Engine compatibility: + "xlrd" supports most old/new Excel file formats. + "openpyxl" engine supports newer Excel file formats. + "odf" engine supports OpenDocument file formats (.odf, .ods, .odt). + "pyxlsb" engine supports Binary Excel files. converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one @@ -784,17 +789,22 @@ def close(self): class ExcelFile: """ Class for parsing tabular excel sheets into DataFrame objects. - Uses xlrd. See read_excel for more documentation + Uses xlrd by default. See read_excel for more documentation Parameters ---------- io : str, path object (pathlib.Path or py._path.local.LocalPath), a file-like object, xlrd workbook or openpypl workbook. - If a string or path object, expected to be a path to xls, xlsx or odf file. + If a string or path object, expected to be a path to a + .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None If io is not a buffer or path, this must be set to identify io. - Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``. - Note that ``odf`` reads tables out of OpenDocument formatted files. + Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``. + Engine compatibility: + ``xlrd`` supports most old/new Excel file formats. + ``openpyxl`` engine supports newer Excel file formats. + ``odf`` engine supports OpenDocument file formats (.odf, .ods, .odt). + ``pyxlsb`` engine supports Binary Excel files. """ from pandas.io.excel._odfreader import _ODFReader @@ -816,7 +826,8 @@ def __init__(self, io, engine=None): raise ValueError(f"Unknown engine: {engine}") self.engine = engine - # could be a str, ExcelFile, Book, etc. + + # Could be a str, ExcelFile, Book, etc. self.io = io # Always a string self._io = stringify_path(io) From cab0c144c0f0b74e8cfbd61b330117a2a8da2a2f Mon Sep 17 00:00:00 2001 From: Miguel Marques Date: Fri, 29 May 2020 20:16:29 +0100 Subject: [PATCH 2/3] Doc-string clarify in read_excel and class ExcelFile --- pandas/io/excel/_base.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 87c316db852a6..1f4b9f1fdf6bc 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -103,12 +103,12 @@ of dtype conversion. engine : str, default None If io is not a buffer or path, this must be set to identify io. - Acceptable values are None, "xlrd", "openpyxl", "odf", or "pyxlsb". + Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd". Engine compatibility: - "xlrd" supports most old/new Excel file formats. - "openpyxl" engine supports newer Excel file formats. - "odf" engine supports OpenDocument file formats (.odf, .ods, .odt). - "pyxlsb" engine supports Binary Excel files. + - "xlrd" supports most old/new Excel file formats. + - "openpyxl" supports newer Excel file formats. + - "odf" supports OpenDocument file formats (.odf, .ods, .odt). + - "pyxlsb" supports Binary Excel files. converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one @@ -800,12 +800,13 @@ class ExcelFile: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None If io is not a buffer or path, this must be set to identify io. - Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``. + Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``, + default ``xlrd``. Engine compatibility: - ``xlrd`` supports most old/new Excel file formats. - ``openpyxl`` engine supports newer Excel file formats. - ``odf`` engine supports OpenDocument file formats (.odf, .ods, .odt). - ``pyxlsb`` engine supports Binary Excel files. + - ``xlrd`` supports most old/new Excel file formats. + - ``openpyxl`` supports newer Excel file formats. + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. """ from pandas.io.excel._odfreader import _ODFReader From 0fda069b8a960a28c75f16a6d42d727a5382562f Mon Sep 17 00:00:00 2001 From: Miguel Marques Date: Fri, 29 May 2020 21:46:52 +0100 Subject: [PATCH 3/3] Fix read_excel docstring identation --- pandas/io/excel/_base.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 1f4b9f1fdf6bc..6cb46d17eb358 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -104,11 +104,11 @@ engine : str, default None If io is not a buffer or path, this must be set to identify io. Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd". - Engine compatibility: - - "xlrd" supports most old/new Excel file formats. - - "openpyxl" supports newer Excel file formats. - - "odf" supports OpenDocument file formats (.odf, .ods, .odt). - - "pyxlsb" supports Binary Excel files. + Engine compatibility : + - "xlrd" supports most old/new Excel file formats. + - "openpyxl" supports newer Excel file formats. + - "odf" supports OpenDocument file formats (.odf, .ods, .odt). + - "pyxlsb" supports Binary Excel files. converters : dict, default None Dict of functions for converting values in certain columns. Keys can either be integers or column labels, values are functions that take one @@ -790,23 +790,24 @@ def close(self): class ExcelFile: """ Class for parsing tabular excel sheets into DataFrame objects. - Uses xlrd by default. See read_excel for more documentation + + Uses xlrd engine by default. See read_excel for more documentation Parameters ---------- io : str, path object (pathlib.Path or py._path.local.LocalPath), - a file-like object, xlrd workbook or openpypl workbook. + a file-like object, xlrd workbook or openpypl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None If io is not a buffer or path, this must be set to identify io. Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``, - default ``xlrd``. - Engine compatibility: - - ``xlrd`` supports most old/new Excel file formats. - - ``openpyxl`` supports newer Excel file formats. - - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - - ``pyxlsb`` supports Binary Excel files. + default ``xlrd``. + Engine compatibility : + - ``xlrd`` supports most old/new Excel file formats. + - ``openpyxl`` supports newer Excel file formats. + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. """ from pandas.io.excel._odfreader import _ODFReader