Skip to content

Commit 43827f0

Browse files
committed
wrap urlopen with requests
1 parent fa47b8d commit 43827f0

File tree

2 files changed

+42
-15
lines changed

2 files changed

+42
-15
lines changed

pandas/io/common.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,13 @@
3030

3131
if compat.PY3:
3232
from urllib.request import urlopen, pathname2url
33-
_urlopen = urlopen
3433
from urllib.parse import urlparse as parse_url
3534
from urllib.parse import (uses_relative, uses_netloc, uses_params,
3635
urlencode, urljoin)
3736
from urllib.error import URLError
3837
from http.client import HTTPException # noqa
3938
else:
40-
from urllib2 import urlopen as _urlopen
39+
from urllib2 import urlopen as urlopen2
4140
from urllib import urlencode, pathname2url # noqa
4241
from urlparse import urlparse as parse_url
4342
from urlparse import uses_relative, uses_netloc, uses_params, urljoin
@@ -46,10 +45,10 @@
4645
from contextlib import contextmanager, closing # noqa
4746
from functools import wraps # noqa
4847

49-
# @wraps(_urlopen)
48+
# @wraps(urlopen2)
5049
@contextmanager
5150
def urlopen(*args, **kwargs):
52-
with closing(_urlopen(*args, **kwargs)) as f:
51+
with closing(urlopen2(*args, **kwargs)) as f:
5352
yield f
5453

5554

@@ -91,6 +90,34 @@ def _is_url(url):
9190
return False
9291

9392

93+
def _urlopen(url, session=None):
94+
compression = None
95+
content_encoding = None
96+
try:
97+
import requests
98+
if session:
99+
if not isinstance(session, requests.sessions.Session):
100+
raise ValueError(
101+
'Expected a requests.sessions.Session object, '
102+
'got {!r}'.format(session)
103+
)
104+
r = session.get(url)
105+
else:
106+
r = requests.get(url)
107+
r.raise_for_status
108+
content = r.content
109+
except ImportError:
110+
r = urlopen(url)
111+
content = r.read()
112+
content_encoding = r.headers.get('Content-Encoding', None)
113+
r.close()
114+
if content_encoding == 'gzip':
115+
# Override compression based on Content-Encoding header.
116+
compression = 'gzip'
117+
reader = BytesIO(content)
118+
return reader, compression
119+
120+
94121
def _expand_user(filepath_or_buffer):
95122
"""Return the argument with an initial component of ~ or ~user
96123
replaced by that user's home directory.
@@ -177,7 +204,7 @@ def is_gcs_url(url):
177204

178205

179206
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
180-
compression=None, mode=None):
207+
compression=None, mode=None, session=None):
181208
"""
182209
If the filepath_or_buffer is a url, translate and return the buffer.
183210
Otherwise passthrough.
@@ -199,13 +226,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
199226
filepath_or_buffer = _stringify_path(filepath_or_buffer)
200227

201228
if _is_url(filepath_or_buffer):
202-
req = _urlopen(filepath_or_buffer)
203-
content_encoding = req.headers.get('Content-Encoding', None)
204-
if content_encoding == 'gzip':
205-
# Override compression based on Content-Encoding header
206-
compression = 'gzip'
207-
reader = BytesIO(req.read())
208-
req.close()
229+
reader, compression = _urlopen(filepath_or_buffer, session=session)
209230
return reader, encoding, compression, True
210231

211232
if is_s3_url(filepath_or_buffer):

pandas/io/parsers.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,9 @@
302302
If a filepath is provided for `filepath_or_buffer`, map the file object
303303
directly onto memory and access the data directly from there. Using this
304304
option can improve performance because there is no longer any I/O overhead.
305+
session : requests.Session
306+
object with the a requests session configuration for remote file.
307+
(requires the requests library)
305308
306309
Returns
307310
-------
@@ -427,10 +430,11 @@ def _read(filepath_or_buffer, kwds):
427430
encoding = re.sub('_', '-', encoding).lower()
428431
kwds['encoding'] = encoding
429432

433+
session = kwds.get('session', None)
430434
compression = kwds.get('compression')
431435
compression = _infer_compression(filepath_or_buffer, compression)
432436
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
433-
filepath_or_buffer, encoding, compression)
437+
filepath_or_buffer, encoding, compression, session=session)
434438
kwds['compression'] = compression
435439

436440
if kwds.get('date_parser', None) is not None:
@@ -617,7 +621,8 @@ def parser_f(filepath_or_buffer,
617621
delim_whitespace=False,
618622
low_memory=_c_parser_defaults['low_memory'],
619623
memory_map=False,
620-
float_precision=None):
624+
float_precision=None,
625+
session=None):
621626

622627
# deprecate read_table GH21948
623628
if name == "read_table":
@@ -699,7 +704,8 @@ def parser_f(filepath_or_buffer,
699704
mangle_dupe_cols=mangle_dupe_cols,
700705
tupleize_cols=tupleize_cols,
701706
infer_datetime_format=infer_datetime_format,
702-
skip_blank_lines=skip_blank_lines)
707+
skip_blank_lines=skip_blank_lines,
708+
session=session)
703709

704710
return _read(filepath_or_buffer, kwds)
705711

0 commit comments

Comments
 (0)