Skip to content

Commit 861df91

Browse files
authored
BUG: fixes bug when using sep=None and comment keyword for read_csv (#31667)
1 parent fcadff3 commit 861df91

File tree

3 files changed

+29
-7
lines changed

3 files changed

+29
-7
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,10 @@ I/O
297297
- Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for
298298
``coerce_timestamps``; following pyarrow's default allows writing nanosecond
299299
timestamps with ``version="2.0"`` (:issue:`31652`).
300+
- Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`)
300301
- Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`)
301302

303+
302304
Plotting
303305
^^^^^^^^
304306

pandas/io/parsers.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2379,19 +2379,21 @@ class MyDialect(csv.Dialect):
23792379

23802380
dia = MyDialect
23812381

2382-
sniff_sep = True
2383-
23842382
if sep is not None:
2385-
sniff_sep = False
23862383
dia.delimiter = sep
2387-
# attempt to sniff the delimiter
2388-
if sniff_sep:
2384+
else:
2385+
# attempt to sniff the delimiter from the first valid line,
2386+
# i.e. no comment line and not in skiprows
23892387
line = f.readline()
2390-
while self.skipfunc(self.pos):
2388+
lines = self._check_comments([[line]])[0]
2389+
while self.skipfunc(self.pos) or not lines:
23912390
self.pos += 1
23922391
line = f.readline()
2392+
lines = self._check_comments([[line]])[0]
23932393

2394-
line = self._check_comments([line])[0]
2394+
# since `line` was a string, lines will be a list containing
2395+
# only a single string
2396+
line = lines[0]
23952397

23962398
self.pos += 1
23972399
self.line_pos += 1

pandas/tests/io/parser/test_python_parser_only.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,24 @@ def test_sniff_delimiter(python_parser_only, kwargs):
6666
tm.assert_frame_equal(result, expected)
6767

6868

69+
def test_sniff_delimiter_comment(python_parser_only):
70+
data = """# comment line
71+
index|A|B|C
72+
# comment line
73+
foo|1|2|3 # ignore | this
74+
bar|4|5|6
75+
baz|7|8|9
76+
"""
77+
parser = python_parser_only
78+
result = parser.read_csv(StringIO(data), index_col=0, sep=None, comment="#")
79+
expected = DataFrame(
80+
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
81+
columns=["A", "B", "C"],
82+
index=Index(["foo", "bar", "baz"], name="index"),
83+
)
84+
tm.assert_frame_equal(result, expected)
85+
86+
6987
@pytest.mark.parametrize("encoding", [None, "utf-8"])
7088
def test_sniff_delimiter_encoding(python_parser_only, encoding):
7189
parser = python_parser_only

0 commit comments

Comments
 (0)