diff --git a/doc/source/release.rst b/doc/source/release.rst index cd8a62664fac1..e586f29816aac 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -39,6 +39,8 @@ pandas 0.13 **Bug Fixes** + - Fixed html tests on win32. (:issue:`4580`) + pandas 0.12 =========== diff --git a/pandas/io/common.py b/pandas/io/common.py index 33958ade2bcd6..bcc447a88e04d 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1,14 +1,25 @@ """Common IO api utilities""" import sys -import urlparse import urllib2 import zipfile from contextlib import contextmanager, closing -from StringIO import StringIO + from pandas.util import py3compat +if py3compat.PY3: # pragma: no cover + import urllib.parse as urlparse + from urllib.parse import urljoin + from urllib.request import pathname2url + from io import StringIO +else: + import urlparse + from urlparse import urljoin + from urllib import pathname2url + from StringIO import StringIO + + _VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc + urlparse.uses_params) _VALID_URLS.discard('') @@ -68,8 +79,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): else: errors = 'replace' encoding = 'utf-8' - bytes = filepath_or_buffer.read().decode(encoding, errors) - filepath_or_buffer = StringIO(bytes) + raw_bytes = filepath_or_buffer.read().decode(encoding, errors) + filepath_or_buffer = StringIO(raw_bytes) return filepath_or_buffer, encoding return filepath_or_buffer, None @@ -91,6 +102,21 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): return filepath_or_buffer, None +def path_to_url(path): + """ + converts an absolute native path to a FILE URL. + + Parameters + ---------- + path : a path in native format + + Returns + ------- + a valid FILE URL + """ + return urljoin('file:', pathname2url(path)) + + # ---------------------- # Prevent double closing if py3compat.PY3: diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index a83d85b89846e..fa905c0154d64 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -1,10 +1,16 @@ import os import re -from cStringIO import StringIO from unittest import TestCase import warnings from distutils.version import LooseVersion +from pandas.util import py3compat + +if py3compat.PY3: + from io import StringIO +else: + from cStringIO import StringIO + import nose from nose.tools import assert_raises @@ -19,6 +25,7 @@ from pandas.io.html import read_html from pandas.io.common import urlopen +from pandas.io.common import path_to_url from pandas import DataFrame, MultiIndex, read_csv, Timestamp from pandas.util.testing import (assert_frame_equal, network, @@ -26,7 +33,6 @@ from pandas.util.testing import makeCustomDataframe as mkdf - def _have_module(module_name): try: import_module(module_name) @@ -292,7 +298,7 @@ def test_bad_url_protocol(self): @slow def test_file_url(self): url = self.banklist_data - dfs = self.run_read_html('file://' + url, 'First', + dfs = self.run_read_html(path_to_url(url), 'First', attrs={'id': 'table'}) self.assertIsInstance(dfs, list) for df in dfs: @@ -338,7 +344,7 @@ def test_multiindex_header_index_skiprows(self): @slow def test_regex_idempotency(self): url = self.banklist_data - dfs = self.run_read_html('file://' + url, + dfs = self.run_read_html(path_to_url(url), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) self.assertIsInstance(dfs, list) @@ -462,9 +468,9 @@ def test_invalid_flavor(): flavor='not a* valid**++ flaver') -def get_elements_from_url(url, element='table', base_url="file://"): +def get_elements_from_file(url, element='table'): _skip_if_none_of(('bs4', 'html5lib')) - url = "".join([base_url, url]) + url = path_to_url(url) from bs4 import BeautifulSoup with urlopen(url) as f: soup = BeautifulSoup(f, features='html5lib') @@ -476,7 +482,7 @@ def test_bs4_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") with warnings.catch_warnings(): warnings.filterwarnings('ignore') - assert get_elements_from_url(filepath, 'table') + assert get_elements_from_file(filepath, 'table') def get_lxml_elements(url, element):