pandas-dev · mindw · Aug 16, 2013 · Aug 17, 2013 · Aug 23, 2013 · jtratner
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -39,6 +39,8 @@ pandas 0.13
 
 **Bug Fixes**
 
+  - Fixed html tests on win32. (:issue:`4580`)
+
 pandas 0.12
 ===========
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -1,14 +1,25 @@
 """Common IO api utilities"""
 
 import sys
-import urlparse
 import urllib2
 import zipfile
 from contextlib import contextmanager, closing
-from StringIO import StringIO
+
 
 from pandas.util import py3compat
 
+if py3compat.PY3: # pragma: no cover
+    import urllib.parse as urlparse
+    from urllib.parse import urljoin
+    from urllib.request import pathname2url
+    from io import StringIO
+else:
+    import urlparse
+    from urlparse import urljoin
+    from urllib import pathname2url
+    from StringIO import StringIO
+
+
 _VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
                   urlparse.uses_params)
 _VALID_URLS.discard('')
@@ -68,8 +79,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
             else:
                 errors = 'replace'
                 encoding = 'utf-8'
-            bytes = filepath_or_buffer.read().decode(encoding, errors)
-            filepath_or_buffer = StringIO(bytes)
+            raw_bytes = filepath_or_buffer.read().decode(encoding, errors)
+            filepath_or_buffer = StringIO(raw_bytes)
             return filepath_or_buffer, encoding
         return filepath_or_buffer, None
 
@@ -91,6 +102,21 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
     return filepath_or_buffer, None
 
 
+def path_to_url(path):
+    """
+    converts an absolute native path to a FILE URL.
+
+    Parameters
+    ----------
+    path : a path in native format
+
+    Returns
+    -------
+    a valid FILE URL
+    """
+    return urljoin('file:', pathname2url(path))
+
+
 # ----------------------
 # Prevent double closing
 if py3compat.PY3:

diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -1,10 +1,16 @@
 import os
 import re
-from cStringIO import StringIO
 from unittest import TestCase
 import warnings
 from distutils.version import LooseVersion
 
+from pandas.util import py3compat
+
+if py3compat.PY3:
+    from io import StringIO
+else:
+    from cStringIO import StringIO
+
 import nose
 from nose.tools import assert_raises
 
@@ -19,14 +25,14 @@
 
 from pandas.io.html import read_html
 from pandas.io.common import urlopen
+from pandas.io.common import path_to_url
 
 from pandas import DataFrame, MultiIndex, read_csv, Timestamp
 from pandas.util.testing import (assert_frame_equal, network,
                                  get_data_path)
 
 from pandas.util.testing import makeCustomDataframe as mkdf
 
-
 def _have_module(module_name):
     try:
         import_module(module_name)
@@ -292,7 +298,7 @@ def test_bad_url_protocol(self):
     @slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.run_read_html('file://' + url, 'First',
+        dfs = self.run_read_html(path_to_url(url), 'First',
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
         for df in dfs:
@@ -338,7 +344,7 @@ def test_multiindex_header_index_skiprows(self):
     @slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.run_read_html('file://' + url,
+        dfs = self.run_read_html(path_to_url(url),
                                  match=re.compile(re.compile('Florida')),
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
@@ -462,9 +468,9 @@ def test_invalid_flavor():
                              flavor='not a* valid**++ flaver')
 
 
-def get_elements_from_url(url, element='table', base_url="file://"):
+def get_elements_from_file(url, element='table'):
     _skip_if_none_of(('bs4', 'html5lib'))
-    url = "".join([base_url, url])
+    url = path_to_url(url)
     from bs4 import BeautifulSoup
     with urlopen(url) as f:
         soup = BeautifulSoup(f, features='html5lib')
@@ -476,7 +482,7 @@ def test_bs4_finds_tables():
     filepath = os.path.join(DATA_PATH, "spam.html")
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore')
-        assert get_elements_from_url(filepath, 'table')
+        assert get_elements_from_file(filepath, 'table')
 
 
 def get_lxml_elements(url, element):