Skip to content

Commit 3ed01a0

Browse files
committed
TST: Refactor test_parsers.py
Refactored tests in test_parsers.py to increase coverage of the different types of parsers and remove nearly duplicate testing in some cases.
1 parent 84725fa commit 3ed01a0

31 files changed

+5328
-5060
lines changed

pandas/io/tests/parser/__init__.py

Whitespace-only changes.

pandas/io/tests/parser/c_parser_only.py

Lines changed: 521 additions & 0 deletions
Large diffs are not rendered by default.

pandas/io/tests/parser/comment.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
Tests that comments are properly handled during parsing
5+
for all of the parsers defined in parsers.py
6+
"""
7+
8+
import numpy as np
9+
import pandas.util.testing as tm
10+
11+
from pandas import DataFrame
12+
from pandas.compat import StringIO
13+
14+
15+
class CommentTests(object):
16+
17+
def test_comment(self):
18+
data = """A,B,C
19+
1,2.,4.#hello world
20+
5.,NaN,10.0
21+
"""
22+
expected = [[1., 2., 4.],
23+
[5., np.nan, 10.]]
24+
df = self.read_csv(StringIO(data), comment='#')
25+
tm.assert_almost_equal(df.values, expected)
26+
27+
df = self.read_table(StringIO(data), sep=',', comment='#',
28+
na_values=['NaN'])
29+
tm.assert_almost_equal(df.values, expected)
30+
31+
def test_line_comment(self):
32+
data = """# empty
33+
A,B,C
34+
1,2.,4.#hello world
35+
#ignore this line
36+
5.,NaN,10.0
37+
"""
38+
expected = [[1., 2., 4.],
39+
[5., np.nan, 10.]]
40+
df = self.read_csv(StringIO(data), comment='#')
41+
tm.assert_almost_equal(df.values, expected)
42+
43+
# check with delim_whitespace=True
44+
df = self.read_csv(StringIO(data.replace(',', ' ')), comment='#',
45+
delim_whitespace=True)
46+
tm.assert_almost_equal(df.values, expected)
47+
48+
# custom line terminator is not supported
49+
# with the Python parser yet
50+
if self.engine == 'c':
51+
expected = [[1., 2., 4.],
52+
[5., np.nan, 10.]]
53+
df = self.read_csv(StringIO(data.replace('\n', '*')),
54+
comment='#', lineterminator='*')
55+
tm.assert_almost_equal(df.values, expected)
56+
57+
def test_comment_skiprows(self):
58+
data = """# empty
59+
random line
60+
# second empty line
61+
1,2,3
62+
A,B,C
63+
1,2.,4.
64+
5.,NaN,10.0
65+
"""
66+
# this should ignore the first four lines (including comments)
67+
expected = [[1., 2., 4.], [5., np.nan, 10.]]
68+
df = self.read_csv(StringIO(data), comment='#', skiprows=4)
69+
tm.assert_almost_equal(df.values, expected)
70+
71+
def test_comment_header(self):
72+
data = """# empty
73+
# second empty line
74+
1,2,3
75+
A,B,C
76+
1,2.,4.
77+
5.,NaN,10.0
78+
"""
79+
# header should begin at the second non-comment line
80+
expected = [[1., 2., 4.], [5., np.nan, 10.]]
81+
df = self.read_csv(StringIO(data), comment='#', header=1)
82+
tm.assert_almost_equal(df.values, expected)
83+
84+
def test_comment_skiprows_header(self):
85+
data = """# empty
86+
# second empty line
87+
# third empty line
88+
X,Y,Z
89+
1,2,3
90+
A,B,C
91+
1,2.,4.
92+
5.,NaN,10.0
93+
"""
94+
# skiprows should skip the first 4 lines (including comments), while
95+
# header should start from the second non-commented line starting
96+
# with line 5
97+
expected = [[1., 2., 4.], [5., np.nan, 10.]]
98+
df = self.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
99+
tm.assert_almost_equal(df.values, expected)
100+
101+
def test_custom_comment_char(self):
102+
data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
103+
104+
result = self.read_csv(StringIO(data), comment='#')
105+
expected = DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
106+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)