diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c517f89855601..6da7947d681c6 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -32,6 +32,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - Handle truncated floats in SAS xport files (:issue:`11713`) +- ``read_excel`` now supports s3 urls of the format ``s3://bucketname/filename`` (:issue:`11447`) .. _whatsnew_0180.enhancements.rounding: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index ffd2768c78824..304cc3d346d1f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -11,7 +11,7 @@ from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser -from pandas.io.common import _is_url, _urlopen, _validate_header_arg +from pandas.io.common import _is_url, _urlopen, _validate_header_arg, get_filepath_or_buffer, _is_s3_url from pandas.tseries.period import Period from pandas import json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, @@ -199,7 +199,10 @@ def __init__(self, io, **kwds): raise ValueError("Unknown engine: %s" % engine) if isinstance(io, compat.string_types): - if _is_url(io): + if _is_s3_url(io): + buffer, _, _ = get_filepath_or_buffer(io) + self.book = xlrd.open_workbook(file_contents=buffer.read()) + elif _is_url(io): data = _urlopen(io).read() self.book = xlrd.open_workbook(file_contents=data) else: diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 27e607870cebc..35aa847492d69 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -65,6 +65,13 @@ def _skip_if_no_excelsuite(): _skip_if_no_openpyxl() +def _skip_if_no_boto(): + try: + import boto # NOQA + except ImportError: + raise nose.SkipTest('boto not installed, skipping') + + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -429,6 +436,15 @@ def test_read_from_http_url(self): local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) + @tm.network(check_before_test=True) + def test_read_from_s3_url(self): + _skip_if_no_boto() + + url = ('s3://pandas-test/test1' + self.ext) + url_table = read_excel(url) + local_table = self.get_exceldf('test1') + tm.assert_frame_equal(url_table, local_table) + @slow def test_read_from_file_url(self):