pandas-dev · gfyoung · Apr 23, 2016
diff --git a/pandas/io/tests/parser/__init__.py b/pandas/io/tests/parser/__init__.py
diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
diff --git a/pandas/io/tests/parser/comment.py b/pandas/io/tests/parser/comment.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that comments are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import numpy as np
+import pandas.util.testing as tm
+
+from pandas import DataFrame
+from pandas.compat import StringIO
+
+
+class CommentTests(object):
+
+    def test_comment(self):
+        data = """A,B,C
+1,2.,4.#hello world
+5.,NaN,10.0
+"""
+        expected = [[1., 2., 4.],
+                    [5., np.nan, 10.]]
+        df = self.read_csv(StringIO(data), comment='#')
+        tm.assert_almost_equal(df.values, expected)
+
+        df = self.read_table(StringIO(data), sep=',', comment='#',
+                             na_values=['NaN'])
+        tm.assert_almost_equal(df.values, expected)
+
+    def test_line_comment(self):
+        data = """# empty
+A,B,C
+1,2.,4.#hello world
+#ignore this line
+5.,NaN,10.0
+"""
+        expected = [[1., 2., 4.],
+                    [5., np.nan, 10.]]
+        df = self.read_csv(StringIO(data), comment='#')
+        tm.assert_almost_equal(df.values, expected)
+
+        # check with delim_whitespace=True
+        df = self.read_csv(StringIO(data.replace(',', ' ')), comment='#',
+                           delim_whitespace=True)
+        tm.assert_almost_equal(df.values, expected)
+
+        # custom line terminator is not supported
+        # with the Python parser yet
+        if self.engine == 'c':
+            expected = [[1., 2., 4.],
+                        [5., np.nan, 10.]]
+            df = self.read_csv(StringIO(data.replace('\n', '*')),
+                               comment='#', lineterminator='*')
+            tm.assert_almost_equal(df.values, expected)
+
+    def test_comment_skiprows(self):
+        data = """# empty
+random line
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+        # this should ignore the first four lines (including comments)
+        expected = [[1., 2., 4.], [5., np.nan, 10.]]
+        df = self.read_csv(StringIO(data), comment='#', skiprows=4)
+        tm.assert_almost_equal(df.values, expected)
+
+    def test_comment_header(self):
+        data = """# empty
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+        # header should begin at the second non-comment line
+        expected = [[1., 2., 4.], [5., np.nan, 10.]]
+        df = self.read_csv(StringIO(data), comment='#', header=1)
+        tm.assert_almost_equal(df.values, expected)
+
+    def test_comment_skiprows_header(self):
+        data = """# empty
+# second empty line
+# third empty line
+X,Y,Z
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+        # skiprows should skip the first 4 lines (including comments), while
+        # header should start from the second non-commented line starting
+        # with line 5
+        expected = [[1., 2., 4.], [5., np.nan, 10.]]
+        df = self.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
+        tm.assert_almost_equal(df.values, expected)
+
+    def test_custom_comment_char(self):
+        data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
+
+        result = self.read_csv(StringIO(data), comment='#')
+        expected = DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
+        tm.assert_frame_equal(result, expected)