Merge pull request #5100 from mindw/win32_tests_fix_master

jtratner · jtratner · commit 1f2cb495d35d · 2013-10-28T16:20:02.000-07:00
TST: win32 paths cannot be turned into URLs by prefixing them with "file://" v2
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,4 @@ pandas/io/*.json
 .pydevproject
 .settings
 .idea
+*.pdb
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -755,6 +755,8 @@ Bug Fixes
   - Bug when renaming then set_index on a DataFrame (:issue:`5344`)
   - Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`)
     (thanks for catching this @yarikoptic!)
+  - Fixed html tests on win32. (:issue:`4580`)
+
 
 pandas 0.12.0
 -------------
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -9,18 +9,18 @@
 
 
 if compat.PY3:
-    from urllib.request import urlopen
+    from urllib.request import urlopen, pathname2url
     _urlopen = urlopen
     from urllib.parse import urlparse as parse_url
     import urllib.parse as compat_parse
-    from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode
+    from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode, urljoin
     from urllib.error import URLError
     from http.client import HTTPException
 else:
     from urllib2 import urlopen as _urlopen
-    from urllib import urlencode
+    from urllib import urlencode, pathname2url
     from urlparse import urlparse as parse_url
-    from urlparse import uses_relative, uses_netloc, uses_params
+    from urlparse import uses_relative, uses_netloc, uses_params, urljoin
     from urllib2 import URLError
     from httplib import HTTPException
     from contextlib import contextmanager, closing
@@ -134,6 +134,21 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
     return filepath_or_buffer, None
 
 
+def file_path_to_url(path):
+    """
+    converts an absolute native path to a FILE URL.
+
+    Parameters
+    ----------
+    path : a path in native format
+
+    Returns
+    -------
+    a valid FILE URL
+    """
+    return urljoin('file:', pathname2url(path))
+
+
 # ZipFile is not a context manager for <= 2.6
 # must be tuple index here since 2.6 doesn't use namedtuple for version_info
 if sys.version_info[1] <= 6:
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -21,7 +21,7 @@
 from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index,
                     date_range, Series)
 from pandas.compat import map, zip, StringIO, string_types
-from pandas.io.common import URLError, urlopen
+from pandas.io.common import URLError, urlopen, file_path_to_url
 from pandas.io.html import read_html
 
 import pandas.util.testing as tm
@@ -311,7 +311,7 @@ def test_invalid_url(self):
     @slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html('file://' + url, 'First', attrs={'id': 'table'})
+        dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'})
         tm.assert_isinstance(dfs, list)
         for df in dfs:
             tm.assert_isinstance(df, DataFrame)
@@ -362,7 +362,7 @@ def test_multiindex_header_index_skiprows(self):
     @slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.read_html('file://' + url,
+        dfs = self.read_html(file_path_to_url(url),
                                  match=re.compile(re.compile('Florida')),
                                  attrs={'id': 'table'})
         tm.assert_isinstance(dfs, list)
@@ -637,9 +637,9 @@ def test_invalid_flavor():
                              flavor='not a* valid**++ flaver')
 
 
-def get_elements_from_url(url, element='table', base_url="file://"):
+def get_elements_from_file(url, element='table'):
     _skip_if_none_of(('bs4', 'html5lib'))
-    url = "".join([base_url, url])
+    url = file_path_to_url(url)
     from bs4 import BeautifulSoup
     with urlopen(url) as f:
         soup = BeautifulSoup(f, features='html5lib')
@@ -651,7 +651,7 @@ def test_bs4_finds_tables():
     filepath = os.path.join(DATA_PATH, "spam.html")
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore')
-        assert get_elements_from_url(filepath, 'table')
+        assert get_elements_from_file(filepath, 'table')
 
 
 def get_lxml_elements(url, element):