From d16c631248aaf57771beff1b565d7168069ba680 Mon Sep 17 00:00:00 2001
From: Patrick O'Brien <pobrien@monetate.com>
Date: Tue, 30 Apr 2013 22:37:43 -0400
Subject: [PATCH 1/3] Support for s3 file handling

---
 pandas/io/parsers.py | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 161e7a521b997..d8f99c083e832 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -188,6 +188,12 @@ def _is_url(url):
     except:
         return False
 
+def _is_s3_url(url):
+    """ Check for an s3 url """
+    try:
+        return urlparse.urlparse(url).scheme == 's3'
+    except:
+        return False
 
 def _read(filepath_or_buffer, kwds):
     "Generic reader of line files."
@@ -196,17 +202,32 @@ def _read(filepath_or_buffer, kwds):
     if skipfooter is not None:
         kwds['skip_footer'] = skipfooter
 
-    if isinstance(filepath_or_buffer, basestring) and _is_url(filepath_or_buffer):
-        from urllib2 import urlopen
-        filepath_or_buffer = urlopen(filepath_or_buffer)
-        if py3compat.PY3:  # pragma: no cover
-            if encoding:
-                errors = 'strict'
-            else:
-                errors = 'replace'
-                encoding = 'utf-8'
-            bytes = filepath_or_buffer.read()
-            filepath_or_buffer = StringIO(bytes.decode(encoding, errors))
+    if isinstance(filepath_or_buffer, basestring):
+        if _is_url(filepath_or_buffer):
+            from urllib2 import urlopen
+            filepath_or_buffer = urlopen(filepath_or_buffer)
+            if py3compat.PY3:  # pragma: no cover
+                if encoding:
+                    errors = 'strict'
+                else:
+                    errors = 'replace'
+                    encoding = 'utf-8'
+                bytes = filepath_or_buffer.read()
+                filepath_or_buffer = StringIO(bytes.decode(encoding, errors))
+
+        if _is_s3_url(filepath_or_buffer):
+            try:
+                import boto
+            except:
+                raise ImportError("boto is required to handle s3 files")
+            # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
+            # are environment variables
+            parsed_url = urlparse.urlparse(filepath_or_buffer)
+            conn = boto.connect_s3()
+            b = conn.get_bucket(parsed_url.netloc)
+            k = boto.s3.key.Key(b)
+            k.key = parsed_url.path
+            filepath_or_buffer = StringIO(k.get_contents_as_string())
 
     if kwds.get('date_parser', None) is not None:
         if isinstance(kwds['parse_dates'], bool):

From e93838601d6ae106702b4b5f90ddf30ea0c03d39 Mon Sep 17 00:00:00 2001
From: Patrick O'Brien <pobrien@monetate.com>
Date: Mon, 6 May 2013 15:10:27 -0400
Subject: [PATCH 2/3] Update README and RELEASE for new S3 support

---
 README.rst  | 1 +
 RELEASE.rst | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.rst b/README.rst
index c9b70f07b0862..ea713006c7189 100644
--- a/README.rst
+++ b/README.rst
@@ -90,6 +90,7 @@ Optional dependencies
      * openpyxl version 1.6.1 or higher, for writing .xlsx files
      * xlrd >= 0.9.0
      * Needed for Excel I/O
+  * `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
 
 
 Installation from sources
diff --git a/RELEASE.rst b/RELEASE.rst
index 77e8e85db6a76..0f52babf26ff0 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -32,6 +32,7 @@ pandas 0.11.1
 
   - pd.read_html() can now parse HTML string, files or urls and return dataframes
     courtesy of @cpcloud. (GH3477_)
+  - Support for reading Amazon S3 files. (GH3504_)
 
 **Improvements to existing features**
 

From f06b43c5c2039ea150cc18603b3ff833ea1e1d25 Mon Sep 17 00:00:00 2001
From: Patrick O'Brien <pobrien@monetate.com>
Date: Fri, 10 May 2013 10:05:08 -0400
Subject: [PATCH 3/3] Update docs noting handling of s3 locations.

---
 doc/source/io.rst    | 5 +++--
 pandas/io/parsers.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 9001ae393d552..8da3d422c50be 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -40,8 +40,9 @@ for some advanced strategies
 
 They can take a number of arguments:
 
-  - ``filepath_or_buffer``: Either a string path to a file, or any object with a
-    ``read`` method (such as an open file or ``StringIO``).
+  - ``filepath_or_buffer``: Either a string path to a file, url
+    (including http, ftp, and s3 locations), or any object with a ``read``
+    method (such as an open file or ``StringIO``).
   - ``sep`` or ``delimiter``: A delimiter / separator to split fields
     on. `read_csv` is capable of inferring the delimiter automatically in some
     cases by "sniffing." The separator may be specified as a regular
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d8f99c083e832..1430843998843 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -34,7 +34,7 @@ class DateConversionError(Exception):
 Parameters
 ----------
 filepath_or_buffer : string or file handle / StringIO. The string could be
-    a URL. Valid URL schemes include http, ftp, and file. For file URLs, a host
+    a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
     is expected. For instance, a local file could be
     file ://localhost/path/to/table.csv
 %s