From 72f1583fc2aa7451a1728fb25a4fbb565aa47609 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 29 Aug 2019 11:33:42 -0600 Subject: [PATCH 1/2] REGR: Fix to_csv with IntervalIndex --- doc/source/whatsnew/v0.25.2.rst | 2 +- pandas/core/indexes/interval.py | 8 +--- pandas/tests/frame/test_to_csv.py | 9 +++++ .../tests/indexes/interval/test_interval.py | 40 +++++++++++++++++++ pandas/tests/series/test_io.py | 9 +++++ 5 files changed, 61 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 6974c7521a237..8d8a39139cf84 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -62,7 +62,7 @@ Missing I/O ^^^ -- +- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`) - - diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 6b0081c6a2ff5..7c581a12764b1 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1096,12 +1096,8 @@ def _format_with_header(self, header, **kwargs): return header + list(self._format_native_types(**kwargs)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): - """ actually format my specific types """ - from pandas.io.formats.format import ExtensionArrayFormatter - - return ExtensionArrayFormatter( - values=self, na_rep=na_rep, justify="all", leading_space=False - ).get_result() + # GH 28210: use base method but with different default na_rep + return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) def _format_data(self, name=None): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index e2e4a82ff581c..5c1c1d3b74e24 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -695,6 +695,15 @@ def _make_frame(names=None): tm.assert_index_equal(recons.columns, exp.columns) assert len(recons) == 0 + def test_to_csv_interval_index(self): + # GH 28210 + df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) + + # can't roundtrip interval index via read_csv so check string output (GH 23595) + result = df.to_csv(path_or_buf=None) + expected = ',A,B\n"(0, 1]",a,0\n"(1, 2]",b,1\n"(2, 3]",c,2\n' + assert result == expected + def test_to_csv_float32_nanrep(self): df = DataFrame(np.random.randn(1, 4).astype(np.float32)) df[1] = np.nan diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index c1a21e6a7f152..eeb0f43f4b900 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -417,6 +417,46 @@ def test_repr_missing(self, constructor, expected): result = repr(obj) assert result == expected + @pytest.mark.parametrize( + "tuples, closed, expected_data", + [ + ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]), + ( + [(0.5, 1.0), np.nan, (2.0, 3.0)], + "right", + ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"], + ), + ( + [ + (Timestamp("20180101"), Timestamp("20180102")), + np.nan, + ((Timestamp("20180102"), Timestamp("20180103"))), + ], + "both", + ["[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]"], + ), + ( + [ + (Timedelta("0 days"), Timedelta("1 days")), + (Timedelta("1 days"), Timedelta("2 days")), + np.nan, + ], + "neither", + [ + "(0 days 00:00:00, 1 days 00:00:00)", + "(1 days 00:00:00, 2 days 00:00:00)", + "NaN", + ], + ), + ], + ) + def test_to_native_types(self, tuples, closed, expected_data): + # GH 28210 + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.to_native_types() + expected = np.array(expected_data) + tm.assert_numpy_array_equal(result, expected) + def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 0686b397cbd81..47774e61fa645 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -191,6 +191,15 @@ def test_to_csv_compression(self, s, encoding, compression): s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding) ) + def test_to_csv_interval_index(self): + # GH 28210 + s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3)) + + # can't roundtrip interval index via read_csv so check string output (GH 23595) + result = s.to_csv(path_or_buf=None, header=False) + expected = '"(0, 1]",foo\n"(1, 2]",bar\n"(2, 3]",baz\n' + assert result == expected + class TestSeriesIO: def test_to_frame(self, datetime_series): From a29ede224929e5b16261d8e61541b502c6963c2e Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 29 Aug 2019 19:17:35 -0600 Subject: [PATCH 2/2] roundtrip csv --- pandas/tests/frame/test_to_csv.py | 13 +++++++++---- pandas/tests/series/test_io.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5c1c1d3b74e24..8fb028a0f0326 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -699,10 +699,15 @@ def test_to_csv_interval_index(self): # GH 28210 df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) - # can't roundtrip interval index via read_csv so check string output (GH 23595) - result = df.to_csv(path_or_buf=None) - expected = ',A,B\n"(0, 1]",a,0\n"(1, 2]",b,1\n"(2, 3]",c,2\n' - assert result == expected + with ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + df.to_csv(path) + result = self.read_csv(path, index_col=0) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = df.copy() + expected.index = expected.index.astype(str) + + assert_frame_equal(result, expected) def test_to_csv_float32_nanrep(self): df = DataFrame(np.random.randn(1, 4).astype(np.float32)) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 47774e61fa645..0ddf1dfcabb59 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -195,10 +195,15 @@ def test_to_csv_interval_index(self): # GH 28210 s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3)) - # can't roundtrip interval index via read_csv so check string output (GH 23595) - result = s.to_csv(path_or_buf=None, header=False) - expected = '"(0, 1]",foo\n"(1, 2]",bar\n"(2, 3]",baz\n' - assert result == expected + with ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + s.to_csv(path, header=False) + result = self.read_csv(path, index_col=0, squeeze=True) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = s.copy() + expected.index = expected.index.astype(str) + + assert_series_equal(result, expected) class TestSeriesIO: