From fe47bf0c00c4afdd9d109220f02525021819de7a Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 20 Jul 2013 13:04:10 -0400 Subject: [PATCH] CLN: remove try suite in _read A skip will now occur if a call to urlopen or a read from the resulting object throws a urllib2.URLError exception --- doc/source/release.rst | 2 ++ doc/source/v0.12.0.txt | 3 +++ pandas/io/html.py | 7 ++----- pandas/io/tests/test_html.py | 13 ++++++++++--- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f85f98a96fa1e..91fd854d54e85 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -111,6 +111,8 @@ pandas 0.12 of the default datetime.min and datetime.max (respectively), thanks @SleepingPills - ``read_html`` now raises when no tables are found and BeautifulSoup==4.2.0 is detected (:issue:`4214`) + - ``read_html`` now raises a ``URLError`` instead of catching and raising a + ``ValueError`` (:issue:`4303`, :issue:`4305`) **API Changes** diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt index 1b691b33f4d85..11be8a37d6c9a 100644 --- a/doc/source/v0.12.0.txt +++ b/doc/source/v0.12.0.txt @@ -348,6 +348,9 @@ Other Enhancements - ``read_html`` now raises when no tables are found and BeautifulSoup==4.2.0 is detected (:issue:`4214`) + - ``read_html`` now raises a ``URLError`` instead of catching and raising a + ``ValueError`` (:issue:`4303`, :issue:`4305`) + Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/io/html.py b/pandas/io/html.py index 64fba1cadc6c2..651a3eb507618 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -113,11 +113,8 @@ def _read(io): raw_text : str """ if _is_url(io): - try: - with urlopen(io) as url: - raw_text = url.read() - except urllib2.URLError: - raise ValueError('Invalid URL: "{0}"'.format(io)) + with urlopen(io) as url: + raw_text = url.read() elif hasattr(io, 'read'): raw_text = io.read() elif os.path.isfile(io): diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index a83d85b89846e..1fcedcfda6854 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -4,6 +4,7 @@ from unittest import TestCase import warnings from distutils.version import LooseVersion +import urllib2 import nose from nose.tools import assert_raises @@ -24,7 +25,7 @@ from pandas.util.testing import (assert_frame_equal, network, get_data_path) -from pandas.util.testing import makeCustomDataframe as mkdf +from pandas.util.testing import makeCustomDataframe as mkdf, rands def _have_module(module_name): @@ -285,9 +286,15 @@ def test_file_like(self): assert_framelist_equal(df1, df2) + @network def test_bad_url_protocol(self): - self.assertRaises(ValueError, self.run_read_html, 'git://github.com', - '.*Water.*') + self.assertRaises(urllib2.URLError, self.run_read_html, + 'git://github.com', '.*Water.*') + + @network + def test_invalid_url(self): + self.assertRaises(urllib2.URLError, self.run_read_html, + 'http://www.a23950sdfa908sd.com') @slow def test_file_url(self):