diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6923b42d3340b..6534172a41384 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -318,6 +318,7 @@ I/O - :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue:`35058`) - :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`) - :meth:`to_picke` and :meth:`read_pickle` did not support compression for file-objects (:issue:`26237`, :issue:`29054`, and :issue:`29570`) +- Bug in :meth:`read_csv` with `engine='python'` truncating data if multiple items present in first row and first element started with BOM (:issue:`36343`) Plotting ^^^^^^^^ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 43ffbe6bdd66c..bc622ab8c1f18 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2886,14 +2886,12 @@ def _check_for_bom(self, first_row): # quotation mark. if len(first_row_bom) > end + 1: new_row += first_row_bom[end + 1 :] - return [new_row] + first_row[1:] - elif len(first_row_bom) > 1: - return [first_row_bom[1:]] else: - # First row is just the BOM, so we - # return an empty string. - return [""] + + # No quotation so just remove BOM from first element + new_row = first_row_bom[1:] + return [new_row] + first_row[1:] def _is_line_empty(self, line): """ diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 1d8d5a29686a4..7f53b14ab77a4 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2128,6 +2128,16 @@ def test_first_row_bom(all_parsers): tm.assert_frame_equal(result, expected) +def test_first_row_bom_unquoted(all_parsers): + # see gh-36343 + parser = all_parsers + data = """\ufeffHead1 Head2 Head3""" + + result = parser.read_csv(StringIO(data), delimiter="\t") + expected = DataFrame(columns=["Head1", "Head2", "Head3"]) + tm.assert_frame_equal(result, expected) + + def test_integer_precision(all_parsers): # Gh 7072 s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765