Skip to content

Commit d16c631

Browse files
author
Patrick O'Brien
committed
Support for s3 file handling
1 parent 6518c79 commit d16c631

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

pandas/io/parsers.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ def _is_url(url):
188188
except:
189189
return False
190190

191+
def _is_s3_url(url):
192+
""" Check for an s3 url """
193+
try:
194+
return urlparse.urlparse(url).scheme == 's3'
195+
except:
196+
return False
191197

192198
def _read(filepath_or_buffer, kwds):
193199
"Generic reader of line files."
@@ -196,17 +202,32 @@ def _read(filepath_or_buffer, kwds):
196202
if skipfooter is not None:
197203
kwds['skip_footer'] = skipfooter
198204

199-
if isinstance(filepath_or_buffer, basestring) and _is_url(filepath_or_buffer):
200-
from urllib2 import urlopen
201-
filepath_or_buffer = urlopen(filepath_or_buffer)
202-
if py3compat.PY3: # pragma: no cover
203-
if encoding:
204-
errors = 'strict'
205-
else:
206-
errors = 'replace'
207-
encoding = 'utf-8'
208-
bytes = filepath_or_buffer.read()
209-
filepath_or_buffer = StringIO(bytes.decode(encoding, errors))
205+
if isinstance(filepath_or_buffer, basestring):
206+
if _is_url(filepath_or_buffer):
207+
from urllib2 import urlopen
208+
filepath_or_buffer = urlopen(filepath_or_buffer)
209+
if py3compat.PY3: # pragma: no cover
210+
if encoding:
211+
errors = 'strict'
212+
else:
213+
errors = 'replace'
214+
encoding = 'utf-8'
215+
bytes = filepath_or_buffer.read()
216+
filepath_or_buffer = StringIO(bytes.decode(encoding, errors))
217+
218+
if _is_s3_url(filepath_or_buffer):
219+
try:
220+
import boto
221+
except:
222+
raise ImportError("boto is required to handle s3 files")
223+
# Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
224+
# are environment variables
225+
parsed_url = urlparse.urlparse(filepath_or_buffer)
226+
conn = boto.connect_s3()
227+
b = conn.get_bucket(parsed_url.netloc)
228+
k = boto.s3.key.Key(b)
229+
k.key = parsed_url.path
230+
filepath_or_buffer = StringIO(k.get_contents_as_string())
210231

211232
if kwds.get('date_parser', None) is not None:
212233
if isinstance(kwds['parse_dates'], bool):

0 commit comments

Comments
 (0)