Skip to content

Commit 1f2cb49

Browse files
committed
Merge pull request #5100 from mindw/win32_tests_fix_master
TST: win32 paths cannot be turned into URLs by prefixing them with "file://" v2
2 parents 1695320 + d20961a commit 1f2cb49

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ pandas/io/*.json
3838
.pydevproject
3939
.settings
4040
.idea
41+
*.pdb

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,8 @@ Bug Fixes
755755
- Bug when renaming then set_index on a DataFrame (:issue:`5344`)
756756
- Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`)
757757
(thanks for catching this @yarikoptic!)
758+
- Fixed html tests on win32. (:issue:`4580`)
759+
758760

759761
pandas 0.12.0
760762
-------------

pandas/io/common.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99

1010

1111
if compat.PY3:
12-
from urllib.request import urlopen
12+
from urllib.request import urlopen, pathname2url
1313
_urlopen = urlopen
1414
from urllib.parse import urlparse as parse_url
1515
import urllib.parse as compat_parse
16-
from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode
16+
from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode, urljoin
1717
from urllib.error import URLError
1818
from http.client import HTTPException
1919
else:
2020
from urllib2 import urlopen as _urlopen
21-
from urllib import urlencode
21+
from urllib import urlencode, pathname2url
2222
from urlparse import urlparse as parse_url
23-
from urlparse import uses_relative, uses_netloc, uses_params
23+
from urlparse import uses_relative, uses_netloc, uses_params, urljoin
2424
from urllib2 import URLError
2525
from httplib import HTTPException
2626
from contextlib import contextmanager, closing
@@ -134,6 +134,21 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
134134
return filepath_or_buffer, None
135135

136136

137+
def file_path_to_url(path):
138+
"""
139+
converts an absolute native path to a FILE URL.
140+
141+
Parameters
142+
----------
143+
path : a path in native format
144+
145+
Returns
146+
-------
147+
a valid FILE URL
148+
"""
149+
return urljoin('file:', pathname2url(path))
150+
151+
137152
# ZipFile is not a context manager for <= 2.6
138153
# must be tuple index here since 2.6 doesn't use namedtuple for version_info
139154
if sys.version_info[1] <= 6:

pandas/io/tests/test_html.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index,
2222
date_range, Series)
2323
from pandas.compat import map, zip, StringIO, string_types
24-
from pandas.io.common import URLError, urlopen
24+
from pandas.io.common import URLError, urlopen, file_path_to_url
2525
from pandas.io.html import read_html
2626

2727
import pandas.util.testing as tm
@@ -311,7 +311,7 @@ def test_invalid_url(self):
311311
@slow
312312
def test_file_url(self):
313313
url = self.banklist_data
314-
dfs = self.read_html('file://' + url, 'First', attrs={'id': 'table'})
314+
dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'})
315315
tm.assert_isinstance(dfs, list)
316316
for df in dfs:
317317
tm.assert_isinstance(df, DataFrame)
@@ -362,7 +362,7 @@ def test_multiindex_header_index_skiprows(self):
362362
@slow
363363
def test_regex_idempotency(self):
364364
url = self.banklist_data
365-
dfs = self.read_html('file://' + url,
365+
dfs = self.read_html(file_path_to_url(url),
366366
match=re.compile(re.compile('Florida')),
367367
attrs={'id': 'table'})
368368
tm.assert_isinstance(dfs, list)
@@ -637,9 +637,9 @@ def test_invalid_flavor():
637637
flavor='not a* valid**++ flaver')
638638

639639

640-
def get_elements_from_url(url, element='table', base_url="file://"):
640+
def get_elements_from_file(url, element='table'):
641641
_skip_if_none_of(('bs4', 'html5lib'))
642-
url = "".join([base_url, url])
642+
url = file_path_to_url(url)
643643
from bs4 import BeautifulSoup
644644
with urlopen(url) as f:
645645
soup = BeautifulSoup(f, features='html5lib')
@@ -651,7 +651,7 @@ def test_bs4_finds_tables():
651651
filepath = os.path.join(DATA_PATH, "spam.html")
652652
with warnings.catch_warnings():
653653
warnings.filterwarnings('ignore')
654-
assert get_elements_from_url(filepath, 'table')
654+
assert get_elements_from_file(filepath, 'table')
655655

656656

657657
def get_lxml_elements(url, element):

0 commit comments

Comments
 (0)