Skip to content

TST: Change test_html to use stored data + mark other #4009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 3, 2013
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions pandas/io/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def test_to_html_compat(self):
assert_frame_equal(res, df)

@network
@slow
def test_banklist_url(self):
url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'
df1 = self.run_read_html(url, 'First Federal Bank of Florida',
Expand All @@ -101,7 +100,6 @@ def test_banklist_url(self):
assert_framelist_equal(df1, df2)

@network
@slow
def test_spam_url(self):
url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
Expand Down Expand Up @@ -332,15 +330,14 @@ def test_negative_skiprows_banklist(self):
self.assertRaises(AssertionError, self.run_read_html, url, 'Florida',
skiprows=-1)

@slow
@network
def test_multiple_matches(self):
url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
dfs = self.run_read_html(url, match='Python',
attrs={'class': 'wikitable'})
self.assertGreater(len(dfs), 1)

@network
@slow
def test_pythonxy_plugins_table(self):
url = 'http://code.google.com/p/pythonxy/wiki/StandardPlugins'
dfs = self.run_read_html(url, match='Python',
Expand Down Expand Up @@ -438,8 +435,9 @@ def test_invalid_flavor():
flavor='not a* valid**++ flaver')


def get_elements_from_url(url, element='table'):
def get_elements_from_url(url, element='table', base_url="file://"):
_skip_if_none_of(('bs4', 'html5lib'))
url = "".join([base_url, url])
from bs4 import BeautifulSoup, SoupStrainer
strainer = SoupStrainer(element)
with closing(urlopen(url)) as f:
Expand All @@ -449,11 +447,10 @@ def get_elements_from_url(url, element='table'):

@slow
def test_bs4_finds_tables():
url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
filepath = os.path.join(DATA_PATH, "spam.html")
with warnings.catch_warnings():
warnings.filterwarnings('ignore')
assert get_elements_from_url(url, 'table')
assert get_elements_from_url(filepath, 'table')


def get_lxml_elements(url, element):
Expand All @@ -465,13 +462,11 @@ def get_lxml_elements(url, element):

@slow
def test_lxml_finds_tables():
url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
assert get_lxml_elements(url, 'table')
filepath = os.path.join(DATA_PATH, "spam.html")
assert get_lxml_elements(filepath, 'table')


@slow
def test_lxml_finds_tbody():
url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
assert get_lxml_elements(url, 'tbody')
filepath = os.path.join(DATA_PATH, "spam.html")
assert get_lxml_elements(filepath, 'tbody')