-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
use requests when it is installed #28874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2031cfe
95e3b75
ac39c2e
03959aa
02a2365
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,13 +183,46 @@ def is_gcs_url(url) -> bool: | |
return parse_url(url).scheme in ["gcs", "gs"] | ||
|
||
|
||
def _urlopen(*args, **kwargs): | ||
compression = None | ||
content_encoding = None | ||
try: | ||
import requests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to use the pandas.compat._optional.import_optional_dependency |
||
|
||
url = args[0] | ||
session = kwargs.pop("session", None) | ||
if session: | ||
if not isinstance(session, requests.sessions.Session): | ||
raise ValueError( | ||
"Expected a requests.sessions.Session object, " | ||
"got {!r}".format(session) | ||
) | ||
r = session.get(url) | ||
else: | ||
r = requests.get(url) | ||
r.raise_for_status() | ||
content = r.content | ||
r.close() | ||
except ImportError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use pandas.compat._optional.import_optional_dependency here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if I got it right. |
||
r = urlopen(*args, **kwargs) | ||
content = r.read() | ||
content_encoding = r.headers.get("Content-Encoding", None) | ||
if content_encoding == "gzip": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this also needs to be under the |
||
# Override compression based on Content-Encoding header. | ||
compression = "gzip" | ||
reader = BytesIO(content) | ||
return reader, compression | ||
|
||
|
||
def urlopen(*args, **kwargs): | ||
""" | ||
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of | ||
the stdlib. | ||
""" | ||
import urllib.request | ||
|
||
_ = kwargs.pop("session") | ||
|
||
return urllib.request.urlopen(*args, **kwargs) | ||
|
||
|
||
|
@@ -198,6 +231,7 @@ def get_filepath_or_buffer( | |
encoding: Optional[str] = None, | ||
compression: Optional[str] = None, | ||
mode: Optional[str] = None, | ||
session=None, | ||
): | ||
""" | ||
If the filepath_or_buffer is a url, translate and return the buffer. | ||
|
@@ -221,13 +255,7 @@ def get_filepath_or_buffer( | |
filepath_or_buffer = _stringify_path(filepath_or_buffer) | ||
|
||
if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): | ||
req = urlopen(filepath_or_buffer) | ||
content_encoding = req.headers.get("Content-Encoding", None) | ||
if content_encoding == "gzip": | ||
# Override compression based on Content-Encoding header | ||
compression = "gzip" | ||
reader = BytesIO(req.read()) | ||
req.close() | ||
reader, compression = _urlopen(filepath_or_buffer, session=session) | ||
return reader, encoding, compression, True | ||
|
||
if is_s3_url(filepath_or_buffer): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ | |
_stringify_path, | ||
_validate_header_arg, | ||
get_filepath_or_buffer, | ||
urlopen, | ||
_urlopen, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the non-private version There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
) | ||
from pandas.io.excel._util import ( | ||
_fill_mi_header, | ||
|
@@ -336,10 +336,10 @@ def read_excel( | |
|
||
|
||
class _BaseExcelReader(metaclass=abc.ABCMeta): | ||
def __init__(self, filepath_or_buffer): | ||
def __init__(self, filepath_or_buffer, session=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit lost on how to pass the |
||
# If filepath_or_buffer is a url, load the data into a BytesIO | ||
if _is_url(filepath_or_buffer): | ||
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) | ||
filepath_or_buffer, _ = _urlopen(filepath_or_buffer, session=session) | ||
elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): | ||
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -342,6 +342,9 @@ | |
values. The options are `None` for the ordinary converter, | ||
`high` for the high-precision converter, and `round_trip` for the | ||
round-trip converter. | ||
session : requests.Session | ||
object with the a requests session configuration for remote file. | ||
(requires the requests library) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a version added tag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what that means. |
||
|
||
Returns | ||
------- | ||
|
@@ -423,6 +426,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): | |
encoding = re.sub("_", "-", encoding).lower() | ||
kwds["encoding"] = encoding | ||
|
||
session = kwds.get("session", None) | ||
compression = kwds.get("compression", "infer") | ||
compression = _infer_compression(filepath_or_buffer, compression) | ||
|
||
|
@@ -431,7 +435,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): | |
# though mypy handling of conditional imports is difficult. | ||
# See https://github.com/python/mypy/issues/1297 | ||
fp_or_buf, _, compression, should_close = get_filepath_or_buffer( | ||
filepath_or_buffer, encoding, compression | ||
filepath_or_buffer, encoding, compression, session=session | ||
) | ||
kwds["compression"] = compression | ||
|
||
|
@@ -588,6 +592,7 @@ def parser_f( | |
low_memory=_c_parser_defaults["low_memory"], | ||
memory_map=False, | ||
float_precision=None, | ||
session=None, | ||
): | ||
|
||
# gh-23761 | ||
|
@@ -674,6 +679,7 @@ def parser_f( | |
mangle_dupe_cols=mangle_dupe_cols, | ||
infer_datetime_format=infer_datetime_format, | ||
skip_blank_lines=skip_blank_lines, | ||
session=session, | ||
) | ||
|
||
return _read(filepath_or_buffer, kwds) | ||
|
Uh oh!
There was an error while loading. Please reload this page.