pandas-dev · jreback · Dec 13, 2020 · Dec 6, 2020 · Dec 7, 2020 · Dec 9, 2020
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -217,6 +217,7 @@ MultiIndex
 I/O
 ^^^
 
+- Bug in :func:`read_csv` interpreting ``NA`` value as comment, when ``NA`` does contain the comment string fixed for ``engine="python"`` (:issue:`34002`)
 - Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
 - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
 - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2983,7 +2983,11 @@ def _check_comments(self, lines):
         for line in lines:
             rl = []
             for x in line:
-                if not isinstance(x, str) or self.comment not in x:
+                if (
+                    not isinstance(x, str)
+                    or self.comment not in x
+                    or x in self.na_values
+                ):
                     rl.append(x)
                 else:
                     x = x[: x.find(self.comment)]

diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py
@@ -134,3 +134,30 @@ def test_comment_first_line(all_parsers, header):
 
     result = parser.read_csv(StringIO(data), comment="#", header=header)
     tm.assert_frame_equal(result, expected)
+
+
+def test_comment_char_in_default_value(all_parsers, request):
+    # GH#34002
+    if all_parsers.engine == "c":
+        reason = "see gh-34002: works on the python engine but not the c engine"
+        # NA value containing comment char is interpreted as comment
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=AssertionError))
+    parser = all_parsers
+
+    data = (
+        "# this is a comment\n"
+        "col1,col2,col3,col4\n"
+        "1,2,3,4#inline comment\n"
+        "4,5#,6,10\n"
+        "7,8,#N/A,11\n"
+    )
+    result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
+    expected = DataFrame(
+        {
+            "col1": [1, 4, 7],
+            "col2": [2, 5, 8],
+            "col3": [3.0, np.nan, np.nan],
+            "col4": [4.0, np.nan, 11.0],
+        }
+    )
+    tm.assert_frame_equal(result, expected)