From 3afb8d56fabf0e2d301d06c3d27fa07e04c22f3b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 31 Jan 2020 12:30:37 -0600 Subject: [PATCH 1/4] REGR: Fixed truncation with na_rep --- doc/source/whatsnew/v1.0.1.rst | 1 + pandas/core/internals/blocks.py | 5 ++++- pandas/tests/frame/test_to_csv.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ff8433c7cafd9..1a5a420ab8b55 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -86,6 +86,7 @@ MultiIndex I/O ^^^ +- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) - - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 22901051ec345..40560e87191b5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -684,7 +684,10 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): itemsize = writers.word_len(na_rep) if not self.is_object and not quoting and itemsize: - values = values.astype(f" Date: Fri, 31 Jan 2020 12:31:40 -0600 Subject: [PATCH 2/4] fixup --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 40560e87191b5..9e31ccebd0f1b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -685,7 +685,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): if not self.is_object and not quoting and itemsize: values = values.astype(str) - if values.dtype.itemsize < np.dtype("U1").itemsize: + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: # enlarge for the na_rep values = values.astype(f" Date: Fri, 31 Jan 2020 12:32:27 -0600 Subject: [PATCH 3/4] move --- pandas/tests/frame/test_to_csv.py | 11 ----------- pandas/tests/io/formats/test_to_csv.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index dd432ffad1399..aeff92971b42a 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1356,14 +1356,3 @@ def test_gz_lineend(self): result = f.read().decode("utf-8") assert result == expected - - def test_na_rep_truncated(self): - # https://github.com/pandas-dev/pandas/issues/31447 - result = pd.Series(range(8, 12)).to_csv(na_rep="-") - assert result == ",0\n0,8\n1,9\n2,10\n3,11\n" - - result = pd.Series([True, False]).to_csv(na_rep="nan") - assert result == ",0\n0,True\n1,False\n" - - result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") - assert result == ",0\n0,1.1\n1,2.2\n" diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index a211ac11cf725..f7bb245cbf3d0 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -583,3 +583,14 @@ def test_to_csv_timedelta_precision(self): ] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected + + def test_na_rep_truncated(self): + # https://github.com/pandas-dev/pandas/issues/31447 + result = pd.Series(range(8, 12)).to_csv(na_rep="-") + assert result == ",0\n0,8\n1,9\n2,10\n3,11\n" + + result = pd.Series([True, False]).to_csv(na_rep="nan") + assert result == ",0\n0,True\n1,False\n" + + result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") + assert result == ",0\n0,1.1\n1,2.2\n" From 8d22308d1a967ab5a97ae560fd0ef2971227dd0e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 31 Jan 2020 13:44:39 -0600 Subject: [PATCH 4/4] fixup --- pandas/tests/io/formats/test_to_csv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index f7bb245cbf3d0..b3ee8da52dece 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -587,10 +587,13 @@ def test_to_csv_timedelta_precision(self): def test_na_rep_truncated(self): # https://github.com/pandas-dev/pandas/issues/31447 result = pd.Series(range(8, 12)).to_csv(na_rep="-") - assert result == ",0\n0,8\n1,9\n2,10\n3,11\n" + expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"]) + assert result == expected result = pd.Series([True, False]).to_csv(na_rep="nan") - assert result == ",0\n0,True\n1,False\n" + expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"]) + assert result == expected result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") - assert result == ",0\n0,1.1\n1,2.2\n" + expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"]) + assert result == expected