From d64a0476c0d39ce173da97ffd4417484a413bf29 Mon Sep 17 00:00:00 2001
From: Gabi Davar <grizzly.nyo@gmail.com>
Date: Fri, 16 Aug 2013 15:17:21 +0300
Subject: [PATCH 1/3] win32 paths cannot be turned into URLs by prefixing them
 with "file://"

see http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
---
 pandas/io/tests/test_html.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index a83d85b89846e..ea7c1aa47171b 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -27,6 +27,12 @@
 from pandas.util.testing import makeCustomDataframe as mkdf
 
 
+import urlparse, urllib
+
+def path2url(path):
+    return urlparse.urljoin(
+        'file:', urllib.pathname2url(path))
+      
 def _have_module(module_name):
     try:
         import_module(module_name)
@@ -292,7 +298,7 @@ def test_bad_url_protocol(self):
     @slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.run_read_html('file://' + url, 'First',
+        dfs = self.run_read_html(path2url(url), 'First',
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
         for df in dfs:
@@ -338,7 +344,7 @@ def test_multiindex_header_index_skiprows(self):
     @slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.run_read_html('file://' + url,
+        dfs = self.run_read_html(path2url(url),
                                  match=re.compile(re.compile('Florida')),
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
@@ -464,7 +470,7 @@ def test_invalid_flavor():
 
 def get_elements_from_url(url, element='table', base_url="file://"):
     _skip_if_none_of(('bs4', 'html5lib'))
-    url = "".join([base_url, url])
+    url = path2url(url) if base_url == "file://" else "".join([base_url, url])
     from bs4 import BeautifulSoup
     with urlopen(url) as f:
         soup = BeautifulSoup(f, features='html5lib')

From ebc23c816cc84a7f62c077a7c40a455a0bcdbe71 Mon Sep 17 00:00:00 2001
From: Gabi Davar <grizzly.nyo@gmail.com>
Date: Sat, 17 Aug 2013 12:53:07 +0300
Subject: [PATCH 2/3] make fix less patchy

---
 doc/source/release.rst       |  2 ++
 pandas/io/common.py          | 17 +++++++++++++++++
 pandas/io/tests/test_html.py | 10 ++++------
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index cd8a62664fac1..e586f29816aac 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -39,6 +39,8 @@ pandas 0.13
 
 **Bug Fixes**
 
+  - Fixed html tests on win32. (:issue:`4580`)
+
 pandas 0.12
 ===========
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 33958ade2bcd6..e447c2a15d802 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -3,6 +3,7 @@
 import sys
 import urlparse
 import urllib2
+import urllib
 import zipfile
 from contextlib import contextmanager, closing
 from StringIO import StringIO
@@ -91,6 +92,22 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
     return filepath_or_buffer, None
 
 
+def path_to_url(path):
+    """
+    converts an absolute native path to a FILE URL.
+
+    Parameters
+    ----------
+    path : a path in native format
+
+    Returns
+    -------
+    a valid FILE URL
+    """
+    return urlparse.urljoin(
+        'file:', urllib.pathname2url(path))
+
+
 # ----------------------
 # Prevent double closing
 if py3compat.PY3:
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index ea7c1aa47171b..61a7159c9468e 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -19,6 +19,7 @@
 
 from pandas.io.html import read_html
 from pandas.io.common import urlopen
+from pandas.io.common import path_to_url
 
 from pandas import DataFrame, MultiIndex, read_csv, Timestamp
 from pandas.util.testing import (assert_frame_equal, network,
@@ -26,9 +27,6 @@
 
 from pandas.util.testing import makeCustomDataframe as mkdf
 
-
-import urlparse, urllib
-
 def path2url(path):
     return urlparse.urljoin(
         'file:', urllib.pathname2url(path))
@@ -298,7 +296,7 @@ def test_bad_url_protocol(self):
     @slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.run_read_html(path2url(url), 'First',
+        dfs = self.run_read_html(path_to_url(url), 'First',
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
         for df in dfs:
@@ -344,7 +342,7 @@ def test_multiindex_header_index_skiprows(self):
     @slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.run_read_html(path2url(url),
+        dfs = self.run_read_html(path_to_url(url),
                                  match=re.compile(re.compile('Florida')),
                                  attrs={'id': 'table'})
         self.assertIsInstance(dfs, list)
@@ -470,7 +468,7 @@ def test_invalid_flavor():
 
 def get_elements_from_url(url, element='table', base_url="file://"):
     _skip_if_none_of(('bs4', 'html5lib'))
-    url = path2url(url) if base_url == "file://" else "".join([base_url, url])
+    url = path_to_url(url) if base_url == "file://" else "".join([base_url, url])
     from bs4 import BeautifulSoup
     with urlopen(url) as f:
         soup = BeautifulSoup(f, features='html5lib')

From 3236647c14047a7c71d30e6c2ae912787bd8b6f6 Mon Sep 17 00:00:00 2001
From: Gabi Davar <grizzly.nyo@gmail.com>
Date: Fri, 23 Aug 2013 11:44:08 +0300
Subject: [PATCH 3/3] make path_to_url python3 friendly

---
 pandas/io/common.py          | 23 ++++++++++++++++-------
 pandas/io/tests/test_html.py | 18 ++++++++++--------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index e447c2a15d802..bcc447a88e04d 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -1,15 +1,25 @@
 """Common IO api utilities"""
 
 import sys
-import urlparse
 import urllib2
-import urllib
 import zipfile
 from contextlib import contextmanager, closing
-from StringIO import StringIO
+
 
 from pandas.util import py3compat
 
+if py3compat.PY3: # pragma: no cover
+    import urllib.parse as urlparse
+    from urllib.parse import urljoin
+    from urllib.request import pathname2url
+    from io import StringIO
+else:
+    import urlparse
+    from urlparse import urljoin
+    from urllib import pathname2url
+    from StringIO import StringIO
+
+
 _VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
                   urlparse.uses_params)
 _VALID_URLS.discard('')
@@ -69,8 +79,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
             else:
                 errors = 'replace'
                 encoding = 'utf-8'
-            bytes = filepath_or_buffer.read().decode(encoding, errors)
-            filepath_or_buffer = StringIO(bytes)
+            raw_bytes = filepath_or_buffer.read().decode(encoding, errors)
+            filepath_or_buffer = StringIO(raw_bytes)
             return filepath_or_buffer, encoding
         return filepath_or_buffer, None
 
@@ -104,8 +114,7 @@ def path_to_url(path):
     -------
     a valid FILE URL
     """
-    return urlparse.urljoin(
-        'file:', urllib.pathname2url(path))
+    return urljoin('file:', pathname2url(path))
 
 
 # ----------------------
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index 61a7159c9468e..fa905c0154d64 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -1,10 +1,16 @@
 import os
 import re
-from cStringIO import StringIO
 from unittest import TestCase
 import warnings
 from distutils.version import LooseVersion
 
+from pandas.util import py3compat
+
+if py3compat.PY3:
+    from io import StringIO
+else:
+    from cStringIO import StringIO
+
 import nose
 from nose.tools import assert_raises
 
@@ -27,10 +33,6 @@
 
 from pandas.util.testing import makeCustomDataframe as mkdf
 
-def path2url(path):
-    return urlparse.urljoin(
-        'file:', urllib.pathname2url(path))
-      
 def _have_module(module_name):
     try:
         import_module(module_name)
@@ -466,9 +468,9 @@ def test_invalid_flavor():
                              flavor='not a* valid**++ flaver')
 
 
-def get_elements_from_url(url, element='table', base_url="file://"):
+def get_elements_from_file(url, element='table'):
     _skip_if_none_of(('bs4', 'html5lib'))
-    url = path_to_url(url) if base_url == "file://" else "".join([base_url, url])
+    url = path_to_url(url)
     from bs4 import BeautifulSoup
     with urlopen(url) as f:
         soup = BeautifulSoup(f, features='html5lib')
@@ -480,7 +482,7 @@ def test_bs4_finds_tables():
     filepath = os.path.join(DATA_PATH, "spam.html")
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore')
-        assert get_elements_from_url(filepath, 'table')
+        assert get_elements_from_file(filepath, 'table')
 
 
 def get_lxml_elements(url, element):