pandas-dev · onesandzeroes · Mar 26, 2016 · jreback · Mar 30, 2016 · onesandzeroes
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -169,3 +169,4 @@ Bug Fixes
 - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
 - Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`)
 - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
+- ``read_excel`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -13,7 +13,7 @@
 from pandas.core.frame import DataFrame
 from pandas.io.parsers import TextParser
 from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
-                              get_filepath_or_buffer, _is_s3_url)
+                              get_filepath_or_buffer)
 from pandas.tseries.period import Period
 from pandas import json
 from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -82,7 +82,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
 
     Parameters
     ----------
-    io : string, file-like object, pandas ExcelFile, or xlrd workbook.
+    io : string, path object (pathlib.Path or py._path.local.LocalPath),
+        file-like object, pandas ExcelFile, or xlrd workbook.
         The string could be a URL. Valid URL schemes include http, ftp, s3,
         and file. For file URLs, a host is expected. For instance, a local
         file could be file://localhost/path/to/workbook.xlsx
@@ -184,8 +185,9 @@ class ExcelFile(object):
 
     Parameters
     ----------
-    io : string, file-like object or xlrd workbook
-        If a string, expected to be a path to xls or xlsx file
+    io : string, path object (pathlib.Path or py._path.local.LocalPath),
+        file-like object or xlrd workbook
+        If a string or path object, expected to be a path to xls or xlsx file
     engine: string, default None
         If io is not a buffer or path, this must be set to identify io.
         Acceptable values are None or xlrd
@@ -207,21 +209,22 @@ def __init__(self, io, **kwds):
         if engine is not None and engine != 'xlrd':
             raise ValueError("Unknown engine: %s" % engine)
 
-        if isinstance(io, compat.string_types):
-            if _is_s3_url(io):
-                buffer, _, _ = get_filepath_or_buffer(io)
-                self.book = xlrd.open_workbook(file_contents=buffer.read())
-            elif _is_url(io):
-                data = _urlopen(io).read()
-                self.book = xlrd.open_workbook(file_contents=data)
-            else:
-                self.book = xlrd.open_workbook(io)
-        elif engine == 'xlrd' and isinstance(io, xlrd.Book):
+        # If io is a url, want to keep the data as bytes so can't pass
+        # to get_filepath_or_buffer()
+        if _is_url(io):
+            io = _urlopen(io)
+        # Deal with S3 urls, path objects, etc. Will convert them to
+        # buffer or path string
+        io, _, _ = get_filepath_or_buffer(io)
+
+        if engine == 'xlrd' and isinstance(io, xlrd.Book):
             self.book = io
         elif not isinstance(io, xlrd.Book) and hasattr(io, "read"):
             # N.B. xlrd.Book has a read attribute too
             data = io.read()
             self.book = xlrd.open_workbook(file_contents=data)
+        elif isinstance(io, compat.string_types):
+            self.book = xlrd.open_workbook(io)
         else:
             raise ValueError('Must explicitly set engine if not passing in'
                              ' buffer or path for io.')

diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -528,6 +528,33 @@ def test_read_from_file_url(self):
 
         tm.assert_frame_equal(url_table, local_table)
 
+    def test_read_from_pathlib_path(self):
+        tm._skip_if_no_pathlib()
+
+        from pathlib import Path
+
+        str_path = os.path.join(self.dirpath, 'test1' + self.ext)
+        expected = read_excel(str_path, 'Sheet1', index_col=0)
+
+        path_obj = Path(self.dirpath, 'test1' + self.ext)
+        actual = read_excel(path_obj, 'Sheet1', index_col=0)
+
+        tm.assert_frame_equal(expected, actual)
+
+    def test_read_from_py_localpath(self):
+        tm._skip_if_no_localpath()
+
+        from py.path import local as LocalPath
+
+        str_path = os.path.join(self.dirpath, 'test1' + self.ext)
+        expected = read_excel(str_path, 'Sheet1', index_col=0)
+
+        abs_dir = os.path.abspath(self.dirpath)
+        path_obj = LocalPath(abs_dir).join('test1' + self.ext)
+        actual = read_excel(path_obj, 'Sheet1', index_col=0)
+
+        tm.assert_frame_equal(expected, actual)
+
     def test_reader_closes_file(self):
 
         pth = os.path.join(self.dirpath, 'test1' + self.ext)