From ad3771561a5e1bf86d43c307683b15fde12f6c3b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 15:51:56 -0700 Subject: [PATCH 1/7] REF: misplaced repr tests --- .../development/contributing_codebase.rst | 6 + pandas/tests/frame/methods/test_info.py | 24 ++++ pandas/tests/frame/test_repr.py | 36 +++++ pandas/tests/io/formats/test_format.py | 26 +--- .../tests/io/formats/test_ipython_compat.py | 90 +++++++++++++ pandas/tests/io/formats/test_printing.py | 126 +----------------- 6 files changed, 160 insertions(+), 148 deletions(-) create mode 100644 pandas/tests/io/formats/test_ipython_compat.py diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 888cfc24aea5b..be8249cd3a287 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -456,6 +456,12 @@ be located. - tests.io + .. note:: + + This includes ``to_string`` but excludes ``__repr__``, which is + tested in ``tests.frame.test_repr`` and ``tests.series.test_repr``. + Other classes often have a ``test_formats`` file. + C) Otherwise This test likely belongs in one of: diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 7d7f436e3cfe6..7d9e0fe90f44c 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -539,3 +539,27 @@ def test_info_compute_numba(): df.info() expected = buf.getvalue() assert result == expected + + +@pytest.mark.parametrize( + "row, columns, show_counts, result", + [ + [20, 20, None, True], + [20, 20, True, True], + [20, 20, False, False], + [5, 5, None, False], + [5, 5, True, False], + [5, 5, False, False], + ], +) +def test_info_show_counts(row, columns, show_counts, result): + # Explicit cast to float to avoid implicit cast when setting nan + df = DataFrame(1, columns=range(10), index=range(10)).astype({1: "float"}) + df.iloc[1, 1] = np.nan + + with option_context( + "display.max_info_rows", row, "display.max_info_columns", columns + ): + with StringIO() as buf: + df.info(buf=buf, show_counts=show_counts) + assert ("non-null" in buf.getvalue()) is result diff --git a/pandas/tests/frame/test_repr.py b/pandas/tests/frame/test_repr.py index cccb4216a7941..74dd4dd2ec07b 100644 --- a/pandas/tests/frame/test_repr.py +++ b/pandas/tests/frame/test_repr.py @@ -446,3 +446,39 @@ def test_repr_ea_columns(self, any_string_dtype): 1 2 5 2 3 6""" assert repr(df) == expected + + +@pytest.mark.parametrize( + "data,output", + [ + ([2, complex("nan"), 1], [" 2.0+0.0j", " NaN+0.0j", " 1.0+0.0j"]), + ([2, complex("nan"), -1], [" 2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), + ([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), + ([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]), + ([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]), + ( + [-1.23j, complex(np.nan, np.nan), 1], + ["-0.00-1.23j", " NaN+ NaNj", " 1.00+0.00j"], + ), + ( + [-1.23j, complex(1.2, np.nan), 1], + ["-0.00-1.23j", " 1.20+ NaNj", " 1.00+0.00j"], + ), + ( + [-1.23j, complex(np.nan, -1.2), 1], + ["-0.00-1.23j", " NaN-1.20j", " 1.00+0.00j"], + ), + ], +) +@pytest.mark.parametrize("as_frame", [True, False]) +def test_repr_with_complex_nans(data, output, as_frame): + # GH#53762, GH#53841 + obj = Series(np.array(data)) + if as_frame: + obj = obj.to_frame(name="val") + reprs = [f"{i} {val}" for i, val in enumerate(output)] + expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs) + else: + reprs = [f"{i} {val}" for i, val in enumerate(output)] + expected = "\n".join(reprs) + "\ndtype: complex128" + assert str(obj) == expected, f"\n{str(obj)}\n\n{expected}" diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 421ca8eb58b50..5a6fad786ed11 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1,5 +1,6 @@ """ -Test output formatting for Series/DataFrame, including to_string & reprs +Tests for the file pandas.io.formats.format, *not* tests for general formatting +of pandas objects. """ from datetime import ( datetime, @@ -144,29 +145,6 @@ def has_expanded_repr(df): class TestDataFrameFormatting: - @pytest.mark.parametrize( - "row, columns, show_counts, result", - [ - [20, 20, None, True], - [20, 20, True, True], - [20, 20, False, False], - [5, 5, None, False], - [5, 5, True, False], - [5, 5, False, False], - ], - ) - def test_show_counts(self, row, columns, show_counts, result): - # Explicit cast to float to avoid implicit cast when setting nan - df = DataFrame(1, columns=range(10), index=range(10)).astype({1: "float"}) - df.iloc[1, 1] = np.nan - - with option_context( - "display.max_info_rows", row, "display.max_info_columns", columns - ): - with StringIO() as buf: - df.info(buf=buf, show_counts=show_counts) - assert ("non-null" in buf.getvalue()) is result - def test_repr_truncation(self): max_len = 20 with option_context("display.max_colwidth", max_len): diff --git a/pandas/tests/io/formats/test_ipython_compat.py b/pandas/tests/io/formats/test_ipython_compat.py new file mode 100644 index 0000000000000..8512f41396906 --- /dev/null +++ b/pandas/tests/io/formats/test_ipython_compat.py @@ -0,0 +1,90 @@ +import numpy as np + +import pandas._config.config as cf + +from pandas import ( + DataFrame, + MultiIndex, +) + + +class TestTableSchemaRepr: + def test_publishes(self, ip): + ipython = ip.instance(config=ip.config) + df = DataFrame({"A": [1, 2]}) + objects = [df["A"], df] # dataframe / series + expected_keys = [ + {"text/plain", "application/vnd.dataresource+json"}, + {"text/plain", "text/html", "application/vnd.dataresource+json"}, + ] + + opt = cf.option_context("display.html.table_schema", True) + last_obj = None + for obj, expected in zip(objects, expected_keys): + last_obj = obj + with opt: + formatted = ipython.display_formatter.format(obj) + assert set(formatted[0].keys()) == expected + + with_latex = cf.option_context("styler.render.repr", "latex") + + with opt, with_latex: + formatted = ipython.display_formatter.format(last_obj) + + expected = { + "text/plain", + "text/html", + "text/latex", + "application/vnd.dataresource+json", + } + assert set(formatted[0].keys()) == expected + + def test_publishes_not_implemented(self, ip): + # column MultiIndex + # GH#15996 + midx = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) + df = DataFrame( + np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx + ) + + opt = cf.option_context("display.html.table_schema", True) + + with opt: + formatted = ip.instance(config=ip.config).display_formatter.format(df) + + expected = {"text/plain", "text/html"} + assert set(formatted[0].keys()) == expected + + def test_config_on(self): + df = DataFrame({"A": [1, 2]}) + with cf.option_context("display.html.table_schema", True): + result = df._repr_data_resource_() + + assert result is not None + + def test_config_default_off(self): + df = DataFrame({"A": [1, 2]}) + with cf.option_context("display.html.table_schema", False): + result = df._repr_data_resource_() + + assert result is None + + def test_enable_data_resource_formatter(self, ip): + # GH#10491 + formatters = ip.instance(config=ip.config).display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + with cf.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + + # still there, just disabled + assert "application/vnd.dataresource+json" in formatters + assert not formatters[mimetype].enabled + + # able to re-set + with cf.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + # smoke test that it works + ip.instance(config=ip.config).display_formatter.format(cf) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 1658b9404d203..e2b65b1fdc40a 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -1,12 +1,9 @@ +# Note! This file is aimed specifically at pandas.io.formats.printing utility +# functions, not the general printing of pandas objects. import string -import numpy as np -import pytest - import pandas._config.config as cf -import pandas as pd - from pandas.io.formats import printing @@ -116,122 +113,3 @@ def test_ambiguous_width(self): expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい" adjoined = adj.adjoin(2, *data) assert adjoined == expected - - -class TestTableSchemaRepr: - def test_publishes(self, ip): - ipython = ip.instance(config=ip.config) - df = pd.DataFrame({"A": [1, 2]}) - objects = [df["A"], df] # dataframe / series - expected_keys = [ - {"text/plain", "application/vnd.dataresource+json"}, - {"text/plain", "text/html", "application/vnd.dataresource+json"}, - ] - - opt = pd.option_context("display.html.table_schema", True) - last_obj = None - for obj, expected in zip(objects, expected_keys): - last_obj = obj - with opt: - formatted = ipython.display_formatter.format(obj) - assert set(formatted[0].keys()) == expected - - with_latex = pd.option_context("styler.render.repr", "latex") - - with opt, with_latex: - formatted = ipython.display_formatter.format(last_obj) - - expected = { - "text/plain", - "text/html", - "text/latex", - "application/vnd.dataresource+json", - } - assert set(formatted[0].keys()) == expected - - def test_publishes_not_implemented(self, ip): - # column MultiIndex - # GH 15996 - midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) - df = pd.DataFrame( - np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx - ) - - opt = pd.option_context("display.html.table_schema", True) - - with opt: - formatted = ip.instance(config=ip.config).display_formatter.format(df) - - expected = {"text/plain", "text/html"} - assert set(formatted[0].keys()) == expected - - def test_config_on(self): - df = pd.DataFrame({"A": [1, 2]}) - with pd.option_context("display.html.table_schema", True): - result = df._repr_data_resource_() - - assert result is not None - - def test_config_default_off(self): - df = pd.DataFrame({"A": [1, 2]}) - with pd.option_context("display.html.table_schema", False): - result = df._repr_data_resource_() - - assert result is None - - def test_enable_data_resource_formatter(self, ip): - # GH 10491 - formatters = ip.instance(config=ip.config).display_formatter.formatters - mimetype = "application/vnd.dataresource+json" - - with pd.option_context("display.html.table_schema", True): - assert "application/vnd.dataresource+json" in formatters - assert formatters[mimetype].enabled - - # still there, just disabled - assert "application/vnd.dataresource+json" in formatters - assert not formatters[mimetype].enabled - - # able to re-set - with pd.option_context("display.html.table_schema", True): - assert "application/vnd.dataresource+json" in formatters - assert formatters[mimetype].enabled - # smoke test that it works - ip.instance(config=ip.config).display_formatter.format(cf) - - -# TODO: this is a test for Series/DataFrame __repr__ -@pytest.mark.parametrize( - "data,output", - [ - ([2, complex("nan"), 1], [" 2.0+0.0j", " NaN+0.0j", " 1.0+0.0j"]), - ([2, complex("nan"), -1], [" 2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), - ([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), - ([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]), - ([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]), - ( - [-1.23j, complex(np.nan, np.nan), 1], - ["-0.00-1.23j", " NaN+ NaNj", " 1.00+0.00j"], - ), - ( - [-1.23j, complex(1.2, np.nan), 1], - ["-0.00-1.23j", " 1.20+ NaNj", " 1.00+0.00j"], - ), - ( - [-1.23j, complex(np.nan, -1.2), 1], - ["-0.00-1.23j", " NaN-1.20j", " 1.00+0.00j"], - ), - ], -) -@pytest.mark.parametrize("as_frame", [True, False]) -def test_ser_df_with_complex_nans(data, output, as_frame): - # GH#53762, GH#53841 - obj = pd.Series(np.array(data)) - if as_frame: - obj = obj.to_frame(name="val") - reprs = [f"{i} {val}" for i, val in enumerate(output)] - expected = f"{'val': >{len(reprs[0])}}\n" + "\n".join(reprs) - else: - reprs = [f"{i} {val}" for i, val in enumerate(output)] - expected = "\n".join(reprs) + "\ndtype: complex128" - assert str(obj) == expected, f"\n{str(obj)}\n\n{expected}" From af6334d9d3bb6bfef9f816e7fddd65a1430bd20b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 16:26:27 -0700 Subject: [PATCH 2/7] misplaced test --- pandas/tests/frame/test_repr.py | 14 -------------- pandas/tests/io/formats/test_to_string.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/test_repr.py b/pandas/tests/frame/test_repr.py index 74dd4dd2ec07b..7c0e374c50bab 100644 --- a/pandas/tests/frame/test_repr.py +++ b/pandas/tests/frame/test_repr.py @@ -422,20 +422,6 @@ def test_to_records_with_inf_record(self): result = repr(df) assert result == expected - def test_masked_ea_with_formatter(self): - # GH#39336 - df = DataFrame( - { - "a": Series([0.123456789, 1.123456789], dtype="Float64"), - "b": Series([1, 2], dtype="Int64"), - } - ) - result = df.to_string(formatters=["{:.2f}".format, "{:.2f}".format]) - expected = """ a b -0 0.12 1.00 -1 1.12 2.00""" - assert result == expected - def test_repr_ea_columns(self, any_string_dtype): # GH#54797 pytest.importorskip("pyarrow") diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 45f3b2201a599..6a6a5ebdf176a 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -14,6 +14,25 @@ import pandas._testing as tm +class TestDataFrameToString: + def test_to_string_masked_ea_with_formatter(self): + # GH#39336 + df = DataFrame( + { + "a": Series([0.123456789, 1.123456789], dtype="Float64"), + "b": Series([1, 2], dtype="Int64"), + } + ) + result = df.to_string(formatters=["{:.2f}".format, "{:.2f}".format]) + expected = dedent( + """\ + a b + 0 0.12 1.00 + 1 1.12 2.00""" + ) + assert result == expected + + def test_repr_embedded_ndarray(): arr = np.empty(10, dtype=[("err", object)]) for i in range(len(arr)): From a63a022af20dad3f3dfed4e6c0e3de1c5c835b33 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 16:30:41 -0700 Subject: [PATCH 3/7] Collect to_string tests with formatters --- pandas/tests/io/formats/test_format.py | 25 ---- pandas/tests/io/formats/test_to_string.py | 172 ++++++++++++---------- 2 files changed, 97 insertions(+), 100 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 5a6fad786ed11..d76cea547d227 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1620,20 +1620,6 @@ def test_to_string_int_formatting(self): expected = " x\n0 -15\n1 20\n2 25\n3 -35" assert output == expected - def test_to_string_index_formatter(self): - df = DataFrame([range(5), range(5, 10), range(10, 15)]) - - rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) - - xp = """\ - 0 1 2 3 4 -a 0 1 2 3 4 -b 5 6 7 8 9 -c 10 11 12 13 14\ -""" - - assert rs == xp - def test_to_string_left_justify_cols(self): tm.reset_display_options() df = DataFrame({"x": [3234, 0.253]}) @@ -1987,17 +1973,6 @@ def test_max_rows_fitted(self, length, min_rows, max_rows, expected): result = formatter.max_rows_fitted assert result == expected - def test_no_extra_space(self): - # GH 52690: Check that no extra space is given - col1 = "TEST" - col2 = "PANDAS" - col3 = "to_string" - expected = f"{col1:<6s} {col2:<7s} {col3:<10s}" - df = DataFrame([{"col1": "TEST", "col2": "PANDAS", "col3": "to_string"}]) - d = {"col1": "{:<6s}".format, "col2": "{:<7s}".format, "col3": "{:<10s}".format} - result = df.to_string(index=False, header=False, formatters=d) - assert result == expected - def gen_series_formatting(): s1 = Series(["a"] * 100) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 6a6a5ebdf176a..6560154652859 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -14,7 +14,7 @@ import pandas._testing as tm -class TestDataFrameToString: +class TestDataFrameToStringFormatters: def test_to_string_masked_ea_with_formatter(self): # GH#39336 df = DataFrame( @@ -32,6 +32,102 @@ def test_to_string_masked_ea_with_formatter(self): ) assert result == expected + def test_to_string_with_formatters(self): + df = DataFrame( + { + "int": [1, 2, 3], + "float": [1.0, 2.0, 3.0], + "object": [(1, 2), True, False], + }, + columns=["int", "float", "object"], + ) + + formatters = [ + ("int", lambda x: f"0x{x:x}"), + ("float", lambda x: f"[{x: 4.1f}]"), + ("object", lambda x: f"-{x!s}-"), + ] + result = df.to_string(formatters=dict(formatters)) + result2 = df.to_string(formatters=list(zip(*formatters))[1]) + assert result == ( + " int float object\n" + "0 0x1 [ 1.0] -(1, 2)-\n" + "1 0x2 [ 2.0] -True-\n" + "2 0x3 [ 3.0] -False-" + ) + assert result == result2 + + def test_to_string_with_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({"months": months}) + + def format_func(x): + return x.strftime("%Y-%m") + + result = x.to_string(formatters={"months": format_func}) + expected = dedent( + """\ + months + 0 2016-01 + 1 2016-02""" + ) + assert result.strip() == expected + + def test_to_string_with_datetime64_hourformatter(self): + x = DataFrame( + {"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")} + ) + + def format_func(x): + return x.strftime("%H:%M") + + result = x.to_string(formatters={"hod": format_func}) + expected = dedent( + """\ + hod + 0 10:10 + 1 12:12""" + ) + assert result.strip() == expected + + def test_to_string_with_formatters_unicode(self): + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + result = df.to_string(formatters={"c/\u03c3": str}) + expected = dedent( + """\ + c/\u03c3 + 0 1 + 1 2 + 2 3""" + ) + assert result == expected + + def test_to_string_index_formatter(self): + df = DataFrame([range(5), range(5, 10), range(10, 15)]) + + rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) + + xp = dedent( + """\ + 0 1 2 3 4 + a 0 1 2 3 4 + b 5 6 7 8 9 + c 10 11 12 13 14\ + """ + ) + assert rs == xp + + def test_no_extra_space(self): + # GH#52690: Check that no extra space is given + col1 = "TEST" + col2 = "PANDAS" + col3 = "to_string" + expected = f"{col1:<6s} {col2:<7s} {col3:<10s}" + df = DataFrame([{"col1": "TEST", "col2": "PANDAS", "col3": "to_string"}]) + d = {"col1": "{:<6s}".format, "col2": "{:<7s}".format, "col3": "{:<10s}".format} + result = df.to_string(index=False, header=False, formatters=d) + assert result == expected + def test_repr_embedded_ndarray(): arr = np.empty(10, dtype=[("err", object)]) @@ -209,80 +305,6 @@ def test_to_string_unicode_three(): dm.to_string(buf) -def test_to_string_with_formatters(): - df = DataFrame( - { - "int": [1, 2, 3], - "float": [1.0, 2.0, 3.0], - "object": [(1, 2), True, False], - }, - columns=["int", "float", "object"], - ) - - formatters = [ - ("int", lambda x: f"0x{x:x}"), - ("float", lambda x: f"[{x: 4.1f}]"), - ("object", lambda x: f"-{x!s}-"), - ] - result = df.to_string(formatters=dict(formatters)) - result2 = df.to_string(formatters=list(zip(*formatters))[1]) - assert result == ( - " int float object\n" - "0 0x1 [ 1.0] -(1, 2)-\n" - "1 0x2 [ 2.0] -True-\n" - "2 0x3 [ 3.0] -False-" - ) - assert result == result2 - - -def test_to_string_with_datetime64_monthformatter(): - months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] - x = DataFrame({"months": months}) - - def format_func(x): - return x.strftime("%Y-%m") - - result = x.to_string(formatters={"months": format_func}) - expected = dedent( - """\ - months - 0 2016-01 - 1 2016-02""" - ) - assert result.strip() == expected - - -def test_to_string_with_datetime64_hourformatter(): - x = DataFrame( - {"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")} - ) - - def format_func(x): - return x.strftime("%H:%M") - - result = x.to_string(formatters={"hod": format_func}) - expected = dedent( - """\ - hod - 0 10:10 - 1 12:12""" - ) - assert result.strip() == expected - - -def test_to_string_with_formatters_unicode(): - df = DataFrame({"c/\u03c3": [1, 2, 3]}) - result = df.to_string(formatters={"c/\u03c3": str}) - expected = dedent( - """\ - c/\u03c3 - 0 1 - 1 2 - 2 3""" - ) - assert result == expected - - def test_to_string_complex_number_trims_zeros(): s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j]) result = s.to_string() From ff0e65276cdbf876c72e0fb5cf6e8f23b2e2cec7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 16:36:42 -0700 Subject: [PATCH 4/7] Collect to_string tests by keyword --- pandas/tests/io/formats/test_format.py | 189 ------------------- pandas/tests/io/formats/test_to_string.py | 210 +++++++++++++++++++++- 2 files changed, 202 insertions(+), 197 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d76cea547d227..8bf83aeb1a043 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -881,51 +881,6 @@ def test_to_string_buffer_all_unicode(self): # this should work buf.getvalue() - def test_to_string_with_col_space(self): - df = DataFrame(np.random.default_rng(2).random(size=(1, 3))) - c10 = len(df.to_string(col_space=10).split("\n")[1]) - c20 = len(df.to_string(col_space=20).split("\n")[1]) - c30 = len(df.to_string(col_space=30).split("\n")[1]) - assert c10 < c20 < c30 - - # GH 8230 - # col_space wasn't being applied with header=False - with_header = df.to_string(col_space=20) - with_header_row1 = with_header.splitlines()[1] - no_header = df.to_string(col_space=20, header=False) - assert len(with_header_row1) == len(no_header) - - def test_to_string_with_column_specific_col_space_raises(self): - df = DataFrame( - np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] - ) - - msg = ( - "Col_space length\\(\\d+\\) should match " - "DataFrame number of columns\\(\\d+\\)" - ) - with pytest.raises(ValueError, match=msg): - df.to_string(col_space=[30, 40]) - - with pytest.raises(ValueError, match=msg): - df.to_string(col_space=[30, 40, 50, 60]) - - msg = "unknown column" - with pytest.raises(ValueError, match=msg): - df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) - - def test_to_string_with_column_specific_col_space(self): - df = DataFrame( - np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] - ) - - result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) - # 3 separating space + each col_space for (id, a, b, c) - assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) - - result = df.to_string(col_space=[10, 11, 12]) - assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) - @pytest.mark.parametrize( "index", [ @@ -1352,26 +1307,6 @@ def test_to_string(self): frame = DataFrame(index=np.arange(200)) frame.to_string() - def test_to_string_no_header(self): - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(header=False) - expected = "0 1 4\n1 2 5\n2 3 6" - - assert df_s == expected - - def test_to_string_specified_header(self): - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(header=["X", "Y"]) - expected = " X Y\n0 1 4\n1 2 5\n2 3 6" - - assert df_s == expected - - msg = "Writing 2 cols but got 1 aliases" - with pytest.raises(ValueError, match=msg): - df.to_string(header=["X"]) - def test_to_string_no_index(self): # GH 16839, GH 13032 df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) @@ -1385,104 +1320,6 @@ def test_to_string_no_index(self): expected = " y x z\n 33 11 AAA\n-44 22 " assert df_s == expected - def test_to_string_line_width_no_index(self): - # GH 13998, GH 22505 - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) - - df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " - - assert df_s == expected - - def test_to_string_line_width_no_header(self): - # GH 53054 - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, header=False) - expected = "0 1 \\\n1 2 \n2 3 \n\n0 4 \n1 5 \n2 6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, header=False) - expected = "0 11 \\\n1 22 \n2 33 \n\n0 4 \n1 5 \n2 6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) - - df_s = df.to_string(line_width=1, header=False) - expected = "0 11 \\\n1 22 \n2 -33 \n\n0 4 \n1 5 \n2 -6 " - - assert df_s == expected - - def test_to_string_line_width_no_index_no_header(self): - # GH 53054 - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, index=False, header=False) - expected = "1 \\\n2 \n3 \n\n4 \n5 \n6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1, index=False, header=False) - expected = "11 \\\n22 \n33 \n\n4 \n5 \n6 " - - assert df_s == expected - - df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) - - df_s = df.to_string(line_width=1, index=False, header=False) - expected = " 11 \\\n 22 \n-33 \n\n 4 \n 5 \n-6 " - - assert df_s == expected - - def test_to_string_line_width_with_both_index_and_header(self): - # GH 53054 - df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1) - expected = ( - " x \\\n0 1 \n1 2 \n2 3 \n\n y \n0 4 \n1 5 \n2 6 " - ) - - assert df_s == expected - - df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) - - df_s = df.to_string(line_width=1) - expected = ( - " x \\\n0 11 \n1 22 \n2 33 \n\n y \n0 4 \n1 5 \n2 6 " - ) - - assert df_s == expected - - df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) - - df_s = df.to_string(line_width=1) - expected = ( - " x \\\n0 11 \n1 22 \n2 -33 \n\n y \n0 4 \n1 5 \n2 -6 " - ) - - assert df_s == expected - def test_to_string_float_formatting(self): tm.reset_display_options() with option_context( @@ -1712,25 +1549,6 @@ def test_to_string_decimal(self): expected = " A\n0 6,0\n1 3,1\n2 2,2" assert df.to_string(decimal=",") == expected - def test_to_string_line_width(self): - df = DataFrame(123, index=range(10, 15), columns=range(30)) - s = df.to_string(line_width=80) - assert max(len(line) for line in s.split("\n")) == 80 - - def test_to_string_header_false(self): - # GH 49230 - df = DataFrame([1, 2]) - df.index.name = "a" - s = df.to_string(header=False) - expected = "a \n0 1\n1 2" - assert s == expected - - df = DataFrame([[1, 2], [3, 4]]) - df.index.name = "a" - s = df.to_string(header=False) - expected = "a \n0 1 2\n1 3 4" - assert s == expected - def test_show_dimensions(self): df = DataFrame(123, index=range(10, 15), columns=range(30)) @@ -2668,13 +2486,6 @@ def test_to_string_header(self): exp = "0 0\n ..\n9 9" assert res == exp - def test_to_string_multindex_header(self): - # GH 16718 - df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(["a", "b"]) - res = df.to_string(header=["r1", "r2"]) - exp = " r1 r2\na b \n0 1 2 3" - assert res == exp - def test_to_string_empty_col(self): # GH 13653 s = Series(["", "Hello", "World", "", "", "Mooooo", "", ""]) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 6560154652859..51d30b01dbcb8 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -129,6 +129,208 @@ def test_no_extra_space(self): assert result == expected +class TestDataFrameToStringColSpace: + def test_to_string_with_column_specific_col_space_raises(self): + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) + + def test_to_string_with_column_specific_col_space(self): + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) + + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) + # 3 separating space + each col_space for (id, a, b, c) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + + result = df.to_string(col_space=[10, 11, 12]) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + + def test_to_string_with_col_space(self): + df = DataFrame(np.random.default_rng(2).random(size=(1, 3))) + c10 = len(df.to_string(col_space=10).split("\n")[1]) + c20 = len(df.to_string(col_space=20).split("\n")[1]) + c30 = len(df.to_string(col_space=30).split("\n")[1]) + assert c10 < c20 < c30 + + # GH#8230 + # col_space wasn't being applied with header=False + with_header = df.to_string(col_space=20) + with_header_row1 = with_header.splitlines()[1] + no_header = df.to_string(col_space=20, header=False) + assert len(with_header_row1) == len(no_header) + + def test_to_string_repr_tuples(self): + buf = StringIO() + + df = DataFrame({"tups": list(zip(range(10), range(10)))}) + repr(df) + df.to_string(col_space=10, buf=buf) + + +class TestDataFrameToStringHeader: + def test_to_string_header_false(self): + # GH#49230 + df = DataFrame([1, 2]) + df.index.name = "a" + s = df.to_string(header=False) + expected = "a \n0 1\n1 2" + assert s == expected + + df = DataFrame([[1, 2], [3, 4]]) + df.index.name = "a" + s = df.to_string(header=False) + expected = "a \n0 1 2\n1 3 4" + assert s == expected + + def test_to_string_multindex_header(self): + # GH#16718 + df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(["a", "b"]) + res = df.to_string(header=["r1", "r2"]) + exp = " r1 r2\na b \n0 1 2 3" + assert res == exp + + def test_to_string_no_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=False) + expected = "0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + def test_to_string_specified_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=["X", "Y"]) + expected = " X Y\n0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + msg = "Writing 2 cols but got 1 aliases" + with pytest.raises(ValueError, match=msg): + df.to_string(header=["X"]) + + +class TestDataFrameToStringLineWidth: + def test_to_string_line_width(self): + df = DataFrame(123, index=range(10, 15), columns=range(30)) + lines = df.to_string(line_width=80) + assert max(len(line) for line in lines.split("\n")) == 80 + + def test_to_string_line_width_no_index(self): + # GH#13998, GH#22505 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_to_string_line_width_no_header(self): + # GH#53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 1 \\\n1 2 \n2 3 \n\n0 4 \n1 5 \n2 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 11 \\\n1 22 \n2 33 \n\n0 4 \n1 5 \n2 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, header=False) + expected = "0 11 \\\n1 22 \n2 -33 \n\n0 4 \n1 5 \n2 -6 " + + assert df_s == expected + + def test_to_string_line_width_with_both_index_and_header(self): + # GH#53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 1 \n1 2 \n2 3 \n\n y \n0 4 \n1 5 \n2 6 " + ) + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 11 \n1 22 \n2 33 \n\n y \n0 4 \n1 5 \n2 6 " + ) + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1) + expected = ( + " x \\\n0 11 \n1 22 \n2 -33 \n\n y \n0 4 \n1 5 \n2 -6 " + ) + + assert df_s == expected + + def test_to_string_line_width_no_index_no_header(self): + # GH#53054 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = "1 \\\n2 \n3 \n\n4 \n5 \n6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = "11 \\\n22 \n33 \n\n4 \n5 \n6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, index=False, header=False) + expected = " 11 \\\n 22 \n-33 \n\n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_repr_embedded_ndarray(): arr = np.empty(10, dtype=[("err", object)]) for i in range(len(arr)): @@ -140,14 +342,6 @@ def test_repr_embedded_ndarray(): df.to_string() -def test_repr_tuples(): - buf = StringIO() - - df = DataFrame({"tups": list(zip(range(10), range(10)))}) - repr(df) - df.to_string(col_space=10, buf=buf) - - def test_to_string_truncate(): # GH 9784 - dont truncate when calling DataFrame.to_string df = DataFrame( From be9fa998cc22d3a14837fce17457a83b8c04f20c Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 16:51:46 -0700 Subject: [PATCH 5/7] collect to_string tests --- pandas/tests/io/formats/test_format.py | 164 +----- pandas/tests/io/formats/test_to_string.py | 633 +++++++++++++--------- 2 files changed, 397 insertions(+), 400 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 8bf83aeb1a043..b4c7378b03356 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2,10 +2,7 @@ Tests for the file pandas.io.formats.format, *not* tests for general formatting of pandas objects. """ -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime from io import StringIO from pathlib import Path import re @@ -1810,37 +1807,6 @@ def test_repr_unicode(self): a.name = "title1" repr(a) - def test_to_string(self): - ts = tm.makeTimeSeries() - buf = StringIO() - - s = ts.to_string() - - retval = ts.to_string(buf=buf) - assert retval is None - assert buf.getvalue().strip() == s - - # pass float_format - format = "%.4f".__mod__ - result = ts.to_string(float_format=format) - result = [x.split()[1] for x in result.split("\n")[:-1]] - expected = [format(x) for x in ts] - assert result == expected - - # empty string - result = ts[:0].to_string() - assert result == "Series([], Freq: B)" - - result = ts[:0].to_string(length=0) - assert result == "Series([], Freq: B)" - - # name and length - cp = ts.copy() - cp.name = "foo" - result = cp.to_string(length=True, name=True, dtype=True) - last_line = result.split("\n")[-1].strip() - assert last_line == (f"Freq: B, Name: foo, Length: {len(cp)}, dtype: float64") - def test_freq_name_separation(self): s = Series( np.random.default_rng(2).standard_normal(10), @@ -1882,13 +1848,6 @@ def test_to_string_float_na_spacing(self): ) assert result == expected - def test_to_string_without_index(self): - # GH 11729 Test index=False option - s = Series([1, 2, 3, 4]) - result = s.to_string(index=False) - expected = "\n".join(["1", "2", "3", "4"]) - assert result == expected - def test_unicode_name_in_footer(self): s = Series([1, 2], name="\u05e2\u05d1\u05e8\u05d9\u05ea") sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea") @@ -2137,16 +2096,16 @@ def test_float_trim_zeros(self): else: assert "+10" in line - def test_datetimeindex(self): + def test_to_string_with_datetimeindex(self): index = date_range("20130102", periods=6) - s = Series(1, index=index) - result = s.to_string() + ser = Series(1, index=index) + result = ser.to_string() assert "2013-01-02" in result # nat in index s2 = Series(2, index=[Timestamp("20130111"), NaT]) - s = pd.concat([s2, s]) - result = s.to_string() + ser = pd.concat([s2, ser]) + result = ser.to_string() assert "NaT" in result # nat in summary @@ -2177,63 +2136,6 @@ def test_datetimeindex_highprecision(self, start_date): result = str(s2.index) assert start_date in result - def test_timedelta64(self): - Series(np.array([1100, 20], dtype="timedelta64[ns]")).to_string() - - s = Series(date_range("2012-1-1", periods=3, freq="D")) - - # GH2146 - - # adding NaTs - y = s - s.shift(1) - result = y.to_string() - assert "1 days" in result - assert "00:00:00" not in result - assert "NaT" in result - - # with frac seconds - o = Series([datetime(2012, 1, 1, microsecond=150)] * 3) - y = s - o - result = y.to_string() - assert "-1 days +23:59:59.999850" in result - - # rounding? - o = Series([datetime(2012, 1, 1, 1)] * 3) - y = s - o - result = y.to_string() - assert "-1 days +23:00:00" in result - assert "1 days 23:00:00" in result - - o = Series([datetime(2012, 1, 1, 1, 1)] * 3) - y = s - o - result = y.to_string() - assert "-1 days +22:59:00" in result - assert "1 days 22:59:00" in result - - o = Series([datetime(2012, 1, 1, 1, 1, microsecond=150)] * 3) - y = s - o - result = y.to_string() - assert "-1 days +22:58:59.999850" in result - assert "0 days 22:58:59.999850" in result - - # neg time - td = timedelta(minutes=5, seconds=3) - s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td - y = s - s2 - result = y.to_string() - assert "-1 days +23:54:57" in result - - td = timedelta(microseconds=550) - s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td - y = s - td - result = y.to_string() - assert "2012-01-01 23:59:59.999450" in result - - # no boxing of the actual elements - td = Series(pd.timedelta_range("1 days", periods=3)) - result = td.to_string() - assert result == "0 1 days\n1 2 days\n2 3 days" - def test_mixed_datetime64(self): df = DataFrame({"A": [1, 2], "B": ["2012-01-01", "2012-01-02"]}) df["B"] = pd.to_datetime(df.B) @@ -2439,60 +2341,6 @@ def test_repr_min_rows(self): # max_rows of None -> never truncate assert ".." not in repr(s) - def test_to_string_name(self): - s = Series(range(100), dtype="int64") - s.name = "myser" - res = s.to_string(max_rows=2, name=True) - exp = "0 0\n ..\n99 99\nName: myser" - assert res == exp - res = s.to_string(max_rows=2, name=False) - exp = "0 0\n ..\n99 99" - assert res == exp - - def test_to_string_dtype(self): - s = Series(range(100), dtype="int64") - res = s.to_string(max_rows=2, dtype=True) - exp = "0 0\n ..\n99 99\ndtype: int64" - assert res == exp - res = s.to_string(max_rows=2, dtype=False) - exp = "0 0\n ..\n99 99" - assert res == exp - - def test_to_string_length(self): - s = Series(range(100), dtype="int64") - res = s.to_string(max_rows=2, length=True) - exp = "0 0\n ..\n99 99\nLength: 100" - assert res == exp - - def test_to_string_na_rep(self): - s = Series(index=range(100), dtype=np.float64) - res = s.to_string(na_rep="foo", max_rows=2) - exp = "0 foo\n ..\n99 foo" - assert res == exp - - def test_to_string_float_format(self): - s = Series(range(10), dtype="float64") - res = s.to_string(float_format=lambda x: f"{x:2.1f}", max_rows=2) - exp = "0 0.0\n ..\n9 9.0" - assert res == exp - - def test_to_string_header(self): - s = Series(range(10), dtype="int64") - s.index.name = "foo" - res = s.to_string(header=True, max_rows=2) - exp = "foo\n0 0\n ..\n9 9" - assert res == exp - res = s.to_string(header=False, max_rows=2) - exp = "0 0\n ..\n9 9" - assert res == exp - - def test_to_string_empty_col(self): - # GH 13653 - s = Series(["", "Hello", "World", "", "", "Mooooo", "", ""]) - res = s.to_string(index=False) - exp = " \n Hello\n World\n \n \nMooooo\n \n " - assert re.match(exp, res) - class TestGenericArrayFormatter: def test_1d_array(self): diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 51d30b01dbcb8..4a376eb1bc01b 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -1,5 +1,9 @@ -from datetime import datetime +from datetime import ( + datetime, + timedelta, +) from io import StringIO +import re from textwrap import dedent import numpy as np @@ -8,7 +12,9 @@ from pandas import ( DataFrame, Series, + date_range, option_context, + timedelta_range, to_datetime, ) import pandas._testing as tm @@ -331,42 +337,32 @@ def test_to_string_line_width_no_index_no_header(self): assert df_s == expected -def test_repr_embedded_ndarray(): - arr = np.empty(10, dtype=[("err", object)]) - for i in range(len(arr)): - arr["err"][i] = np.random.default_rng(2).standard_normal(i) - - df = DataFrame(arr) - repr(df["err"]) - repr(df) - df.to_string() +class TestDataFrameToString: + def test_repr_embedded_ndarray(self): + arr = np.empty(10, dtype=[("err", object)]) + for i in range(len(arr)): + arr["err"][i] = np.random.default_rng(2).standard_normal(i) + df = DataFrame(arr) + repr(df["err"]) + repr(df) + df.to_string() -def test_to_string_truncate(): - # GH 9784 - dont truncate when calling DataFrame.to_string - df = DataFrame( - [ - { - "a": "foo", - "b": "bar", - "c": "let's make this a very VERY long line that is longer " - "than the default 50 character limit", - "d": 1, - }, - {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, - ] - ) - df.set_index(["a", "b", "c"]) - assert df.to_string() == ( - " a b " - " c d\n" - "0 foo bar let's make this a very VERY long line t" - "hat is longer than the default 50 character limit 1\n" - "1 foo bar " - " stuff 1" - ) - with option_context("max_colwidth", 20): - # the display option has no effect on the to_string method + def test_to_string_truncate(self): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) assert df.to_string() == ( " a b " " c d\n" @@ -375,231 +371,384 @@ def test_to_string_truncate(): "1 foo bar " " stuff 1" ) - assert df.to_string(max_colwidth=20) == ( - " a b c d\n" - "0 foo bar let's make this ... 1\n" - "1 foo bar stuff 1" + with option_context("max_colwidth", 20): + # the display option has no effect on the to_string method + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + assert df.to_string(max_colwidth=20) == ( + " a b c d\n" + "0 foo bar let's make this ... 1\n" + "1 foo bar stuff 1" + ) + + @pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], ) + def test_format_remove_leading_space_dataframe(self, input_array, expected): + # GH#24980 + df = DataFrame(input_array).to_string(index=False) + assert df == expected + @pytest.mark.parametrize( + "data,expected", + [ + ( + {"col1": [1, 2], "col2": [3, 4]}, + " col1 col2\n0 1 3\n1 2 4", + ), + ( + {"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]}, + " col1 col2\n0 Abc NaN\n1 0.756 4.5435", + ), + ( + {"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]}, + " col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23", + ), + ], + ) + def test_to_string_max_rows_zero(self, data, expected): + # GH#35394 + result = DataFrame(data=data).to_string(max_rows=0) + assert result == expected -@pytest.mark.parametrize( - "input_array, expected", - [ - ("a", "a"), - (["a", "b"], "a\nb"), - ([1, "a"], "1\na"), - (1, "1"), - ([0, -1], " 0\n-1"), - (1.0, "1.0"), - ([" a", " b"], " a\n b"), - ([".1", "1"], ".1\n 1"), - (["10", "-10"], " 10\n-10"), - ], -) -def test_format_remove_leading_space_series(input_array, expected): - # GH: 24980 - s = Series(input_array).to_string(index=False) - assert s == expected - - -@pytest.mark.parametrize( - "input_array, expected", - [ - ({"A": ["a"]}, "A\na"), - ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), - ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), - ], -) -def test_format_remove_leading_space_dataframe(input_array, expected): - # GH: 24980 - df = DataFrame(input_array).to_string(index=False) - assert df == expected - - -@pytest.mark.parametrize( - "max_cols, max_rows, expected", - [ - ( - 10, - None, - " 0 1 2 3 4 ... 6 7 8 9 10\n" - " 0 0 0 0 0 ... 0 0 0 0 0\n" - " 0 0 0 0 0 ... 0 0 0 0 0\n" - " 0 0 0 0 0 ... 0 0 0 0 0\n" - " 0 0 0 0 0 ... 0 0 0 0 0", - ), - ( - None, - 2, - " 0 1 2 3 4 5 6 7 8 9 10\n" - " 0 0 0 0 0 0 0 0 0 0 0\n" - " .. .. .. .. .. .. .. .. .. .. ..\n" - " 0 0 0 0 0 0 0 0 0 0 0", - ), - ( - 10, - 2, - " 0 1 2 3 4 ... 6 7 8 9 10\n" - " 0 0 0 0 0 ... 0 0 0 0 0\n" - " .. .. .. .. .. ... .. .. .. .. ..\n" - " 0 0 0 0 0 ... 0 0 0 0 0", - ), - ( - 9, - 2, - " 0 1 2 3 ... 7 8 9 10\n" - " 0 0 0 0 ... 0 0 0 0\n" - " .. .. .. .. ... .. .. .. ..\n" - " 0 0 0 0 ... 0 0 0 0", - ), - ( - 1, - 1, - " 0 ...\n 0 ...\n.. ...", - ), - ], -) -def test_truncation_no_index(max_cols, max_rows, expected): - df = DataFrame([[0] * 11] * 4) - assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected + @pytest.mark.parametrize( + "max_cols, max_rows, expected", + [ + ( + 10, + None, + " 0 1 2 3 4 ... 6 7 8 9 10\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " 0 0 0 0 0 ... 0 0 0 0 0", + ), + ( + None, + 2, + " 0 1 2 3 4 5 6 7 8 9 10\n" + " 0 0 0 0 0 0 0 0 0 0 0\n" + " .. .. .. .. .. .. .. .. .. .. ..\n" + " 0 0 0 0 0 0 0 0 0 0 0", + ), + ( + 10, + 2, + " 0 1 2 3 4 ... 6 7 8 9 10\n" + " 0 0 0 0 0 ... 0 0 0 0 0\n" + " .. .. .. .. .. ... .. .. .. .. ..\n" + " 0 0 0 0 0 ... 0 0 0 0 0", + ), + ( + 9, + 2, + " 0 1 2 3 ... 7 8 9 10\n" + " 0 0 0 0 ... 0 0 0 0\n" + " .. .. .. .. ... .. .. .. ..\n" + " 0 0 0 0 ... 0 0 0 0", + ), + ( + 1, + 1, + " 0 ...\n 0 ...\n.. ...", + ), + ], + ) + def test_truncation_no_index(self, max_cols, max_rows, expected): + df = DataFrame([[0] * 11] * 4) + assert ( + df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected + ) + def test_to_string_unicode_columns(self, float_frame): + df = DataFrame({"\u03c3": np.arange(10.0)}) -def test_to_string_unicode_columns(float_frame): - df = DataFrame({"\u03c3": np.arange(10.0)}) + buf = StringIO() + df.to_string(buf=buf) + buf.getvalue() - buf = StringIO() - df.to_string(buf=buf) - buf.getvalue() + buf = StringIO() + df.info(buf=buf) + buf.getvalue() - buf = StringIO() - df.info(buf=buf) - buf.getvalue() + result = float_frame.to_string() + assert isinstance(result, str) - result = float_frame.to_string() - assert isinstance(result, str) + @pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) + def test_to_string_na_rep_and_float_format(self, na_rep): + # GH#13828 + df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"]) + result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format) + expected = dedent( + f"""\ + Group Data + 0 A 1.22 + 1 A {na_rep}""" + ) + assert result == expected + def test_to_string_string_dtype(self): + # GH#50099 + pytest.importorskip("pyarrow") + df = DataFrame( + {"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]} + ) + df = df.astype( + {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"} + ) + result = df.dtypes.to_string() + expected = dedent( + """\ + x string[pyarrow] + y string[python] + z int64[pyarrow]""" + ) + assert result == expected -def test_to_string_utf8_columns(): - n = "\u05d0".encode() + def test_to_string_pos_args_deprecation(self): + # GH#54229 + df = DataFrame({"a": [1, 2, 3]}) + msg = ( + "Starting with pandas version 3.0 all arguments of to_string " + "except for the " + "argument 'buf' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + buf = StringIO() + df.to_string(buf, None, None, True, True) - with option_context("display.max_rows", 1): + def test_to_string_utf8_columns(self): + n = "\u05d0".encode() df = DataFrame([1, 2], columns=[n]) - repr(df) + with option_context("display.max_rows", 1): + repr(df) -def test_to_string_unicode_two(): - dm = DataFrame({"c/\u03c3": []}) - buf = StringIO() - dm.to_string(buf) + def test_to_string_unicode_two(self): + dm = DataFrame({"c/\u03c3": []}) + buf = StringIO() + dm.to_string(buf) + def test_to_string_unicode_three(self): + dm = DataFrame(["\xc2"]) + buf = StringIO() + dm.to_string(buf) -def test_to_string_unicode_three(): - dm = DataFrame(["\xc2"]) - buf = StringIO() - dm.to_string(buf) +class TestSeriesToString: + def test_to_string_without_index(self): + # GH#11729 Test index=False option + ser = Series([1, 2, 3, 4]) + result = ser.to_string(index=False) + expected = "\n".join(["1", "2", "3", "4"]) + assert result == expected -def test_to_string_complex_number_trims_zeros(): - s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j]) - result = s.to_string() - expected = dedent( - """\ - 0 1.00+1.00j - 1 1.00+1.00j - 2 1.05+1.00j""" - ) - assert result == expected - - -def test_nullable_float_to_string(float_ea_dtype): - # https://github.com/pandas-dev/pandas/issues/36775 - dtype = float_ea_dtype - s = Series([0.0, 1.0, None], dtype=dtype) - result = s.to_string() - expected = dedent( - """\ - 0 0.0 - 1 1.0 - 2 """ - ) - assert result == expected - - -def test_nullable_int_to_string(any_int_ea_dtype): - # https://github.com/pandas-dev/pandas/issues/36775 - dtype = any_int_ea_dtype - s = Series([0, 1, None], dtype=dtype) - result = s.to_string() - expected = dedent( - """\ - 0 0 - 1 1 - 2 """ - ) - assert result == expected - - -@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) -def test_to_string_na_rep_and_float_format(na_rep): - # GH 13828 - df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"]) - result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format) - expected = dedent( - f"""\ - Group Data - 0 A 1.22 - 1 A {na_rep}""" - ) - assert result == expected - - -@pytest.mark.parametrize( - "data,expected", - [ - ( - {"col1": [1, 2], "col2": [3, 4]}, - " col1 col2\n0 1 3\n1 2 4", - ), - ( - {"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]}, - " col1 col2\n0 Abc NaN\n1 0.756 4.5435", - ), - ( - {"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]}, - " col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23", - ), - ], -) -def test_to_string_max_rows_zero(data, expected): - # GH35394 - result = DataFrame(data=data).to_string(max_rows=0) - assert result == expected - - -def test_to_string_string_dtype(): - # GH#50099 - pytest.importorskip("pyarrow") - df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]}) - df = df.astype( - {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"} - ) - result = df.dtypes.to_string() - expected = dedent( - """\ - x string[pyarrow] - y string[python] - z int64[pyarrow]""" - ) - assert result == expected + def test_to_string_name(self): + ser = Series(range(100), dtype="int64") + ser.name = "myser" + res = ser.to_string(max_rows=2, name=True) + exp = "0 0\n ..\n99 99\nName: myser" + assert res == exp + res = ser.to_string(max_rows=2, name=False) + exp = "0 0\n ..\n99 99" + assert res == exp + def test_to_string_dtype(self): + ser = Series(range(100), dtype="int64") + res = ser.to_string(max_rows=2, dtype=True) + exp = "0 0\n ..\n99 99\ndtype: int64" + assert res == exp + res = ser.to_string(max_rows=2, dtype=False) + exp = "0 0\n ..\n99 99" + assert res == exp -def test_to_string_pos_args_deprecation(): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_string except for the " - r"argument 'buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): + def test_to_string_length(self): + ser = Series(range(100), dtype="int64") + res = ser.to_string(max_rows=2, length=True) + exp = "0 0\n ..\n99 99\nLength: 100" + assert res == exp + + def test_to_string_na_rep(self): + ser = Series(index=range(100), dtype=np.float64) + res = ser.to_string(na_rep="foo", max_rows=2) + exp = "0 foo\n ..\n99 foo" + assert res == exp + + def test_to_string_float_format(self): + ser = Series(range(10), dtype="float64") + res = ser.to_string(float_format=lambda x: f"{x:2.1f}", max_rows=2) + exp = "0 0.0\n ..\n9 9.0" + assert res == exp + + def test_to_string_header(self): + ser = Series(range(10), dtype="int64") + ser.index.name = "foo" + res = ser.to_string(header=True, max_rows=2) + exp = "foo\n0 0\n ..\n9 9" + assert res == exp + res = ser.to_string(header=False, max_rows=2) + exp = "0 0\n ..\n9 9" + assert res == exp + + def test_to_string_empty_col(self): + # GH#13653 + ser = Series(["", "Hello", "World", "", "", "Mooooo", "", ""]) + res = ser.to_string(index=False) + exp = " \n Hello\n World\n \n \nMooooo\n \n " + assert re.match(exp, res) + + def test_to_string_timedelta64(self): + Series(np.array([1100, 20], dtype="timedelta64[ns]")).to_string() + + ser = Series(date_range("2012-1-1", periods=3, freq="D")) + + # GH#2146 + + # adding NaTs + y = ser - ser.shift(1) + result = y.to_string() + assert "1 days" in result + assert "00:00:00" not in result + assert "NaT" in result + + # with frac seconds + o = Series([datetime(2012, 1, 1, microsecond=150)] * 3) + y = ser - o + result = y.to_string() + assert "-1 days +23:59:59.999850" in result + + # rounding? + o = Series([datetime(2012, 1, 1, 1)] * 3) + y = ser - o + result = y.to_string() + assert "-1 days +23:00:00" in result + assert "1 days 23:00:00" in result + + o = Series([datetime(2012, 1, 1, 1, 1)] * 3) + y = ser - o + result = y.to_string() + assert "-1 days +22:59:00" in result + assert "1 days 22:59:00" in result + + o = Series([datetime(2012, 1, 1, 1, 1, microsecond=150)] * 3) + y = ser - o + result = y.to_string() + assert "-1 days +22:58:59.999850" in result + assert "0 days 22:58:59.999850" in result + + # neg time + td = timedelta(minutes=5, seconds=3) + s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td + y = ser - s2 + result = y.to_string() + assert "-1 days +23:54:57" in result + + td = timedelta(microseconds=550) + s2 = Series(date_range("2012-1-1", periods=3, freq="D")) + td + y = ser - td + result = y.to_string() + assert "2012-01-01 23:59:59.999450" in result + + # no boxing of the actual elements + td = Series(timedelta_range("1 days", periods=3)) + result = td.to_string() + assert result == "0 1 days\n1 2 days\n2 3 days" + + def test_to_string(self): + ts = tm.makeTimeSeries() buf = StringIO() - df.to_string(buf, None, None, True, True) + + s = ts.to_string() + + retval = ts.to_string(buf=buf) + assert retval is None + assert buf.getvalue().strip() == s + + # pass float_format + format = "%.4f".__mod__ + result = ts.to_string(float_format=format) + result = [x.split()[1] for x in result.split("\n")[:-1]] + expected = [format(x) for x in ts] + assert result == expected + + # empty string + result = ts[:0].to_string() + assert result == "Series([], Freq: B)" + + result = ts[:0].to_string(length=0) + assert result == "Series([], Freq: B)" + + # name and length + cp = ts.copy() + cp.name = "foo" + result = cp.to_string(length=True, name=True, dtype=True) + last_line = result.split("\n")[-1].strip() + assert last_line == (f"Freq: B, Name: foo, Length: {len(cp)}, dtype: float64") + + @pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), + ], + ) + def test_format_remove_leading_space_series(self, input_array, expected): + # GH: 24980 + ser = Series(input_array) + result = ser.to_string(index=False) + assert result == expected + + def test_to_string_complex_number_trims_zeros(self): + ser = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j]) + result = ser.to_string() + expected = dedent( + """\ + 0 1.00+1.00j + 1 1.00+1.00j + 2 1.05+1.00j""" + ) + assert result == expected + + def test_nullable_float_to_string(self, float_ea_dtype): + # https://github.com/pandas-dev/pandas/issues/36775 + dtype = float_ea_dtype + ser = Series([0.0, 1.0, None], dtype=dtype) + result = ser.to_string() + expected = dedent( + """\ + 0 0.0 + 1 1.0 + 2 """ + ) + assert result == expected + + def test_nullable_int_to_string(self, any_int_ea_dtype): + # https://github.com/pandas-dev/pandas/issues/36775 + dtype = any_int_ea_dtype + ser = Series([0, 1, None], dtype=dtype) + result = ser.to_string() + expected = dedent( + """\ + 0 0 + 1 1 + 2 """ + ) + assert result == expected From 06a508451bd2ceb7f8a60b34187f0388a5bb8f6f Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 18:09:25 -0700 Subject: [PATCH 6/7] collect to_string tests --- pandas/tests/io/formats/test_format.py | 422 --------------------- pandas/tests/io/formats/test_to_string.py | 438 ++++++++++++++++++++++ 2 files changed, 438 insertions(+), 422 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b4c7378b03356..46368b5fee2df 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1028,12 +1028,6 @@ def test_datetimeindex_highprecision(self, start_date): result = str(df.index) assert start_date in result - def test_nonunicode_nonascii_alignment(self): - df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) - rep_str = df.to_string() - lines = rep_str.split("\n") - assert len(lines[1]) == len(lines[2]) - def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({"c/\u03c3": Series({"test": np.nan})}) str(dm.to_string()) @@ -1179,259 +1173,6 @@ def test_long_series(self): nmatches = len(re.findall("dtype", str_rep)) assert nmatches == 1 - def test_index_with_nan(self): - # GH 2850 - df = DataFrame( - { - "id1": {0: "1a3", 1: "9h4"}, - "id2": {0: np.nan, 1: "d67"}, - "id3": {0: "78d", 1: "79d"}, - "value": {0: 123, 1: 64}, - } - ) - - # multi-index - y = df.set_index(["id1", "id2", "id3"]) - result = y.to_string() - expected = ( - " value\nid1 id2 id3 \n" - "1a3 NaN 78d 123\n9h4 d67 79d 64" - ) - assert result == expected - - # index - y = df.set_index("id2") - result = y.to_string() - expected = ( - " id1 id3 value\nid2 \n" - "NaN 1a3 78d 123\nd67 9h4 79d 64" - ) - assert result == expected - - # with append (this failed in 0.12) - y = df.set_index(["id1", "id2"]).set_index("id3", append=True) - result = y.to_string() - expected = ( - " value\nid1 id2 id3 \n" - "1a3 NaN 78d 123\n9h4 d67 79d 64" - ) - assert result == expected - - # all-nan in mi - df2 = df.copy() - df2.loc[:, "id2"] = np.nan - y = df2.set_index("id2") - result = y.to_string() - expected = ( - " id1 id3 value\nid2 \n" - "NaN 1a3 78d 123\nNaN 9h4 79d 64" - ) - assert result == expected - - # partial nan in mi - df2 = df.copy() - df2.loc[:, "id2"] = np.nan - y = df2.set_index(["id2", "id3"]) - result = y.to_string() - expected = ( - " id1 value\nid2 id3 \n" - "NaN 78d 1a3 123\n 79d 9h4 64" - ) - assert result == expected - - df = DataFrame( - { - "id1": {0: np.nan, 1: "9h4"}, - "id2": {0: np.nan, 1: "d67"}, - "id3": {0: np.nan, 1: "79d"}, - "value": {0: 123, 1: 64}, - } - ) - - y = df.set_index(["id1", "id2", "id3"]) - result = y.to_string() - expected = ( - " value\nid1 id2 id3 \n" - "NaN NaN NaN 123\n9h4 d67 79d 64" - ) - assert result == expected - - def test_to_string(self): - # big mixed - biggie = DataFrame( - { - "A": np.random.default_rng(2).standard_normal(200), - "B": tm.makeStringIndex(200), - }, - ) - - biggie.loc[:20, "A"] = np.nan - biggie.loc[:20, "B"] = np.nan - s = biggie.to_string() - - buf = StringIO() - retval = biggie.to_string(buf=buf) - assert retval is None - assert buf.getvalue() == s - - assert isinstance(s, str) - - # print in right order - result = biggie.to_string( - columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ - ) - lines = result.split("\n") - header = lines[0].strip().split() - joined = "\n".join([re.sub(r"\s+", " ", x).strip() for x in lines[1:]]) - recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") - tm.assert_series_equal(recons["B"], biggie["B"]) - assert recons["A"].count() == biggie["A"].count() - assert (np.abs(recons["A"].dropna() - biggie["A"].dropna()) < 0.1).all() - - # expected = ['B', 'A'] - # assert header == expected - - result = biggie.to_string(columns=["A"], col_space=17) - header = result.split("\n")[0].strip().split() - expected = ["A"] - assert header == expected - - biggie.to_string(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) - - biggie.to_string(columns=["B", "A"], float_format=str) - biggie.to_string(columns=["B", "A"], col_space=12, float_format=str) - - frame = DataFrame(index=np.arange(200)) - frame.to_string() - - def test_to_string_no_index(self): - # GH 16839, GH 13032 - df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) - - df_s = df.to_string(index=False) - # Leading space is expected for positive numbers. - expected = " x y z\n11 33 AAA\n22 -44 " - assert df_s == expected - - df_s = df[["y", "x", "z"]].to_string(index=False) - expected = " y x z\n 33 11 AAA\n-44 22 " - assert df_s == expected - - def test_to_string_float_formatting(self): - tm.reset_display_options() - with option_context( - "display.precision", - 5, - "display.notebook_repr_html", - False, - ): - df = DataFrame( - {"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]} - ) - - df_s = df.to_string() - - if _three_digit_exp(): - expected = ( - " x\n0 0.00000e+000\n1 2.50000e-001\n" - "2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n" - "5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n" - "8 -1.00000e+006" - ) - else: - expected = ( - " x\n0 0.00000e+00\n1 2.50000e-01\n" - "2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n" - "5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n" - "8 -1.00000e+06" - ) - assert df_s == expected - - df = DataFrame({"x": [3234, 0.253]}) - df_s = df.to_string() - - expected = " x\n0 3234.000\n1 0.253" - assert df_s == expected - - tm.reset_display_options() - assert get_option("display.precision") == 6 - - df = DataFrame({"x": [1e9, 0.2512]}) - df_s = df.to_string() - - if _three_digit_exp(): - expected = " x\n0 1.000000e+009\n1 2.512000e-001" - else: - expected = " x\n0 1.000000e+09\n1 2.512000e-01" - assert df_s == expected - - def test_to_string_float_format_no_fixed_width(self): - # GH 21625 - df = DataFrame({"x": [0.19999]}) - expected = " x\n0 0.200" - assert df.to_string(float_format="%.3f") == expected - - # GH 22270 - df = DataFrame({"x": [100.0]}) - expected = " x\n0 100" - assert df.to_string(float_format="%.0f") == expected - - def test_to_string_small_float_values(self): - df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]}) - - result = df.to_string() - # sadness per above - if _three_digit_exp(): - expected = ( - " a\n" - "0 1.500000e+000\n" - "1 1.000000e-017\n" - "2 -5.500000e-007" - ) - else: - expected = ( - " a\n" - "0 1.500000e+00\n" - "1 1.000000e-17\n" - "2 -5.500000e-07" - ) - assert result == expected - - # but not all exactly zero - df = df * 0 - result = df.to_string() - expected = " 0\n0 0\n1 0\n2 -0" - - def test_to_string_float_index(self): - index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(np.arange(5), index=index) - - result = df.to_string() - expected = " 0\n1.5 0\n2.0 1\n3.0 2\n4.0 3\n5.0 4" - assert result == expected - - def test_to_string_complex_float_formatting(self): - # GH #25514, 25745 - with option_context("display.precision", 5): - df = DataFrame( - { - "x": [ - (0.4467846931321966 + 0.0715185102060818j), - (0.2739442392974528 + 0.23515228785438969j), - (0.26974928742135185 + 0.3250604054898979j), - (-1j), - ] - } - ) - result = df.to_string() - expected = ( - " x\n0 0.44678+0.07152j\n" - "1 0.27394+0.23515j\n" - "2 0.26975+0.32506j\n" - "3 -0.00000-1.00000j" - ) - assert result == expected - def test_to_string_ascii_error(self): data = [ ( @@ -1446,106 +1187,6 @@ def test_to_string_ascii_error(self): # it works! repr(df) - def test_to_string_int_formatting(self): - df = DataFrame({"x": [-15, 20, 25, -35]}) - assert issubclass(df["x"].dtype.type, np.integer) - - output = df.to_string() - expected = " x\n0 -15\n1 20\n2 25\n3 -35" - assert output == expected - - def test_to_string_left_justify_cols(self): - tm.reset_display_options() - df = DataFrame({"x": [3234, 0.253]}) - df_s = df.to_string(justify="left") - expected = " x \n0 3234.000\n1 0.253" - assert df_s == expected - - def test_to_string_format_na(self): - tm.reset_display_options() - df = DataFrame( - { - "A": [np.nan, -1, -2.1234, 3, 4], - "B": [np.nan, "foo", "foooo", "fooooo", "bar"], - } - ) - result = df.to_string() - - expected = ( - " A B\n" - "0 NaN NaN\n" - "1 -1.0000 foo\n" - "2 -2.1234 foooo\n" - "3 3.0000 fooooo\n" - "4 4.0000 bar" - ) - assert result == expected - - df = DataFrame( - { - "A": [np.nan, -1.0, -2.0, 3.0, 4.0], - "B": [np.nan, "foo", "foooo", "fooooo", "bar"], - } - ) - result = df.to_string() - - expected = ( - " A B\n" - "0 NaN NaN\n" - "1 -1.0 foo\n" - "2 -2.0 foooo\n" - "3 3.0 fooooo\n" - "4 4.0 bar" - ) - assert result == expected - - def test_to_string_format_inf(self): - # Issue #24861 - tm.reset_display_options() - df = DataFrame( - { - "A": [-np.inf, np.inf, -1, -2.1234, 3, 4], - "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], - } - ) - result = df.to_string() - - expected = ( - " A B\n" - "0 -inf -inf\n" - "1 inf inf\n" - "2 -1.0000 foo\n" - "3 -2.1234 foooo\n" - "4 3.0000 fooooo\n" - "5 4.0000 bar" - ) - assert result == expected - - df = DataFrame( - { - "A": [-np.inf, np.inf, -1.0, -2.0, 3.0, 4.0], - "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], - } - ) - result = df.to_string() - - expected = ( - " A B\n" - "0 -inf -inf\n" - "1 inf inf\n" - "2 -1.0 foo\n" - "3 -2.0 foooo\n" - "4 3.0 fooooo\n" - "5 4.0 bar" - ) - assert result == expected - - def test_to_string_decimal(self): - # Issue #23614 - df = DataFrame({"A": [6.0, 3.1, 2.2]}) - expected = " A\n0 6,0\n1 3,1\n2 2,2" - assert df.to_string(decimal=",") == expected - def test_show_dimensions(self): df = DataFrame(123, index=range(10, 15), columns=range(30)) @@ -1720,22 +1361,6 @@ def test_repr_float_format_in_object_col(self, float_format, expected): assert result == expected - def test_dict_entries(self): - df = DataFrame({"A": [{"a": 1, "b": 2}]}) - - val = df.to_string() - assert "'a': 1" in val - assert "'b': 2" in val - - def test_categorical_columns(self): - # GH35439 - data = [[4, 2], [3, 2], [4, 3]] - cols = ["aaaaaaaaa", "b"] - df = DataFrame(data, columns=cols) - df_cat_cols = DataFrame(data, columns=pd.CategoricalIndex(cols)) - - assert df.to_string() == df_cat_cols.to_string() - def test_period(self): # GH 12615 df = DataFrame( @@ -1817,37 +1442,6 @@ def test_freq_name_separation(self): result = repr(s) assert "Freq: D, Name: 0" in result - def test_to_string_mixed(self): - s = Series(["foo", np.nan, -1.23, 4.56]) - result = s.to_string() - expected = "".join(["0 foo\n", "1 NaN\n", "2 -1.23\n", "3 4.56"]) - assert result == expected - - # but don't count NAs as floats - s = Series(["foo", np.nan, "bar", "baz"]) - result = s.to_string() - expected = "".join(["0 foo\n", "1 NaN\n", "2 bar\n", "3 baz"]) - assert result == expected - - s = Series(["foo", 5, "bar", "baz"]) - result = s.to_string() - expected = "".join(["0 foo\n", "1 5\n", "2 bar\n", "3 baz"]) - assert result == expected - - def test_to_string_float_na_spacing(self): - s = Series([0.0, 1.5678, 2.0, -3.0, 4.0]) - s[::2] = np.nan - - result = s.to_string() - expected = ( - "0 NaN\n" - "1 1.5678\n" - "2 NaN\n" - "3 -3.0000\n" - "4 NaN" - ) - assert result == expected - def test_unicode_name_in_footer(self): s = Series([1, 2], name="\u05e2\u05d1\u05e8\u05d9\u05ea") sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea") @@ -2096,22 +1690,6 @@ def test_float_trim_zeros(self): else: assert "+10" in line - def test_to_string_with_datetimeindex(self): - index = date_range("20130102", periods=6) - ser = Series(1, index=index) - result = ser.to_string() - assert "2013-01-02" in result - - # nat in index - s2 = Series(2, index=[Timestamp("20130111"), NaT]) - ser = pd.concat([s2, ser]) - result = ser.to_string() - assert "NaT" in result - - # nat in summary - result = str(s2.index) - assert "NaT" in result - @pytest.mark.parametrize( "start_date", [ diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 4a376eb1bc01b..a8f6c07e9ef5b 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -10,16 +10,27 @@ import pytest from pandas import ( + CategoricalIndex, DataFrame, + Index, + NaT, Series, + Timestamp, + concat, date_range, + get_option, option_context, + read_csv, timedelta_range, to_datetime, ) import pandas._testing as tm +def _three_digit_exp(): + return f"{1.7e8:.4g}" == "1.7e+008" + + class TestDataFrameToStringFormatters: def test_to_string_masked_ea_with_formatter(self): # GH#39336 @@ -337,7 +348,233 @@ def test_to_string_line_width_no_index_no_header(self): assert df_s == expected +class TestToStringNumericFormatting: + def test_to_string_float_format_no_fixed_width(self): + # GH#21625 + df = DataFrame({"x": [0.19999]}) + expected = " x\n0 0.200" + assert df.to_string(float_format="%.3f") == expected + + # GH#22270 + df = DataFrame({"x": [100.0]}) + expected = " x\n0 100" + assert df.to_string(float_format="%.0f") == expected + + def test_to_string_small_float_values(self): + df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]}) + + result = df.to_string() + # sadness per above + if _three_digit_exp(): + expected = ( + " a\n" + "0 1.500000e+000\n" + "1 1.000000e-017\n" + "2 -5.500000e-007" + ) + else: + expected = ( + " a\n" + "0 1.500000e+00\n" + "1 1.000000e-17\n" + "2 -5.500000e-07" + ) + assert result == expected + + # but not all exactly zero + df = df * 0 + result = df.to_string() + expected = " 0\n0 0\n1 0\n2 -0" + # TODO: assert that these match?? + + def test_to_string_complex_float_formatting(self): + # GH #25514, 25745 + with option_context("display.precision", 5): + df = DataFrame( + { + "x": [ + (0.4467846931321966 + 0.0715185102060818j), + (0.2739442392974528 + 0.23515228785438969j), + (0.26974928742135185 + 0.3250604054898979j), + (-1j), + ] + } + ) + result = df.to_string() + expected = ( + " x\n0 0.44678+0.07152j\n" + "1 0.27394+0.23515j\n" + "2 0.26975+0.32506j\n" + "3 -0.00000-1.00000j" + ) + assert result == expected + + def test_to_string_format_inf(self): + # GH#24861 + tm.reset_display_options() + df = DataFrame( + { + "A": [-np.inf, np.inf, -1, -2.1234, 3, 4], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0000 foo\n" + "3 -2.1234 foooo\n" + "4 3.0000 fooooo\n" + "5 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [-np.inf, np.inf, -1.0, -2.0, 3.0, 4.0], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0 foo\n" + "3 -2.0 foooo\n" + "4 3.0 fooooo\n" + "5 4.0 bar" + ) + assert result == expected + + def test_to_string_int_formatting(self): + df = DataFrame({"x": [-15, 20, 25, -35]}) + assert issubclass(df["x"].dtype.type, np.integer) + + output = df.to_string() + expected = " x\n0 -15\n1 20\n2 25\n3 -35" + assert output == expected + + def test_to_string_float_formatting(self): + tm.reset_display_options() + with option_context( + "display.precision", + 5, + "display.notebook_repr_html", + False, + ): + df = DataFrame( + {"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]} + ) + + df_s = df.to_string() + + if _three_digit_exp(): + expected = ( + " x\n0 0.00000e+000\n1 2.50000e-001\n" + "2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n" + "5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n" + "8 -1.00000e+006" + ) + else: + expected = ( + " x\n0 0.00000e+00\n1 2.50000e-01\n" + "2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n" + "5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n" + "8 -1.00000e+06" + ) + assert df_s == expected + + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string() + + expected = " x\n0 3234.000\n1 0.253" + assert df_s == expected + + tm.reset_display_options() + assert get_option("display.precision") == 6 + + df = DataFrame({"x": [1e9, 0.2512]}) + df_s = df.to_string() + + if _three_digit_exp(): + expected = " x\n0 1.000000e+009\n1 2.512000e-001" + else: + expected = " x\n0 1.000000e+09\n1 2.512000e-01" + assert df_s == expected + + class TestDataFrameToString: + def test_to_string_decimal(self): + # GH#23614 + df = DataFrame({"A": [6.0, 3.1, 2.2]}) + expected = " A\n0 6,0\n1 3,1\n2 2,2" + assert df.to_string(decimal=",") == expected + + def test_to_string_left_justify_cols(self): + tm.reset_display_options() + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string(justify="left") + expected = " x \n0 3234.000\n1 0.253" + assert df_s == expected + + def test_to_string_format_na(self): + tm.reset_display_options() + df = DataFrame( + { + "A": [np.nan, -1, -2.1234, 3, 4], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0000 foo\n" + "2 -2.1234 foooo\n" + "3 3.0000 fooooo\n" + "4 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [np.nan, -1.0, -2.0, 3.0, 4.0], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0 foo\n" + "2 -2.0 foooo\n" + "3 3.0 fooooo\n" + "4 4.0 bar" + ) + assert result == expected + + def test_to_string_with_dict_entries(self): + df = DataFrame({"A": [{"a": 1, "b": 2}]}) + + val = df.to_string() + assert "'a': 1" in val + assert "'b': 2" in val + + def test_to_string_with_categorical_columns(self): + # GH#35439 + data = [[4, 2], [3, 2], [4, 3]] + cols = ["aaaaaaaaa", "b"] + df = DataFrame(data, columns=cols) + df_cat_cols = DataFrame(data, columns=CategoricalIndex(cols)) + + assert df.to_string() == df_cat_cols.to_string() + def test_repr_embedded_ndarray(self): arr = np.empty(10, dtype=[("err", object)]) for i in range(len(arr)): @@ -471,6 +708,19 @@ def test_truncation_no_index(self, max_cols, max_rows, expected): df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected ) + def test_to_string_no_index(self): + # GH#16839, GH#13032 + df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) + + df_s = df.to_string(index=False) + # Leading space is expected for positive numbers. + expected = " x y z\n11 33 AAA\n22 -44 " + assert df_s == expected + + df_s = df[["y", "x", "z"]].to_string(index=False) + expected = " y x z\n 33 11 AAA\n-44 22 " + assert df_s == expected + def test_to_string_unicode_columns(self, float_frame): df = DataFrame({"\u03c3": np.arange(10.0)}) @@ -545,6 +795,147 @@ def test_to_string_unicode_three(self): buf = StringIO() dm.to_string(buf) + def test_to_string_with_float_index(self): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.arange(5), index=index) + + result = df.to_string() + expected = " 0\n1.5 0\n2.0 1\n3.0 2\n4.0 3\n5.0 4" + assert result == expected + + def test_to_string(self): + # big mixed + biggie = DataFrame( + { + "A": np.random.default_rng(2).standard_normal(200), + "B": tm.makeStringIndex(200), + }, + ) + + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + s = biggie.to_string() + + buf = StringIO() + retval = biggie.to_string(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + # print in right order + result = biggie.to_string( + columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ + ) + lines = result.split("\n") + header = lines[0].strip().split() + joined = "\n".join([re.sub(r"\s+", " ", x).strip() for x in lines[1:]]) + recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") + tm.assert_series_equal(recons["B"], biggie["B"]) + assert recons["A"].count() == biggie["A"].count() + assert (np.abs(recons["A"].dropna() - biggie["A"].dropna()) < 0.1).all() + + # FIXME: don't leave commented-out + # expected = ['B', 'A'] + # assert header == expected + + result = biggie.to_string(columns=["A"], col_space=17) + header = result.split("\n")[0].strip().split() + expected = ["A"] + assert header == expected + + biggie.to_string(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) + + biggie.to_string(columns=["B", "A"], float_format=str) + biggie.to_string(columns=["B", "A"], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_string() + + # TODO: split or simplify this test? + def test_to_string_index_with_nan(self): + # GH#2850 + df = DataFrame( + { + "id1": {0: "1a3", 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: "78d", 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + # multi-index + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # index + y = df.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nd67 9h4 79d 64" + ) + assert result == expected + + # with append (this failed in 0.12) + y = df.set_index(["id1", "id2"]).set_index("id3", append=True) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # all-nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nNaN 9h4 79d 64" + ) + assert result == expected + + # partial nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index(["id2", "id3"]) + result = y.to_string() + expected = ( + " id1 value\nid2 id3 \n" + "NaN 78d 1a3 123\n 79d 9h4 64" + ) + assert result == expected + + df = DataFrame( + { + "id1": {0: np.nan, 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: np.nan, 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "NaN NaN NaN 123\n9h4 d67 79d 64" + ) + assert result == expected + + def test_to_string_nonunicode_nonascii_alignment(self): + df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) + rep_str = df.to_string() + lines = rep_str.split("\n") + assert len(lines[1]) == len(lines[2]) + class TestSeriesToString: def test_to_string_without_index(self): @@ -752,3 +1143,50 @@ def test_nullable_int_to_string(self, any_int_ea_dtype): 2 """ ) assert result == expected + + def test_to_string_mixed(self): + ser = Series(["foo", np.nan, -1.23, 4.56]) + result = ser.to_string() + expected = "".join(["0 foo\n", "1 NaN\n", "2 -1.23\n", "3 4.56"]) + assert result == expected + + # but don't count NAs as floats + ser = Series(["foo", np.nan, "bar", "baz"]) + result = ser.to_string() + expected = "".join(["0 foo\n", "1 NaN\n", "2 bar\n", "3 baz"]) + assert result == expected + + ser = Series(["foo", 5, "bar", "baz"]) + result = ser.to_string() + expected = "".join(["0 foo\n", "1 5\n", "2 bar\n", "3 baz"]) + assert result == expected + + def test_to_string_float_na_spacing(self): + ser = Series([0.0, 1.5678, 2.0, -3.0, 4.0]) + ser[::2] = np.nan + + result = ser.to_string() + expected = ( + "0 NaN\n" + "1 1.5678\n" + "2 NaN\n" + "3 -3.0000\n" + "4 NaN" + ) + assert result == expected + + def test_to_string_with_datetimeindex(self): + index = date_range("20130102", periods=6) + ser = Series(1, index=index) + result = ser.to_string() + assert "2013-01-02" in result + + # nat in index + s2 = Series(2, index=[Timestamp("20130111"), NaT]) + ser = concat([s2, ser]) + result = ser.to_string() + assert "NaT" in result + + # nat in summary + result = str(s2.index) + assert "NaT" in result From eabf8330554cb560870581e38ccc249c193eb592 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Oct 2023 18:11:21 -0700 Subject: [PATCH 7/7] collect to_string tests --- pandas/tests/io/formats/test_format.py | 25 +---------------------- pandas/tests/io/formats/test_to_string.py | 23 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 46368b5fee2df..8901eb99b7612 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -7,7 +7,6 @@ from pathlib import Path import re from shutil import get_terminal_size -import sys import numpy as np import pytest @@ -498,17 +497,7 @@ def test_auto_detect(self): with option_context("display.max_columns", 0): assert has_horizontally_truncated_repr(df) - def test_to_string_repr_unicode(self): - buf = StringIO() - - unicode_values = ["\u03c3"] * 10 - unicode_values = np.array(unicode_values, dtype=object) - df = DataFrame({"unicode": unicode_values}) - df.to_string(col_space=10, buf=buf) - - # it works! - repr(df) - + def test_to_string_repr_unicode2(self): idx = Index(["abc", "\u03c3a", "aegdvg"]) ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) rs = repr(ser).split("\n") @@ -521,14 +510,6 @@ def test_to_string_repr_unicode(self): if not line.startswith("dtype:"): assert len(line) == line_len - # it works even if sys.stdin in None - _stdin = sys.stdin - try: - sys.stdin = None - repr(df) - finally: - sys.stdin = _stdin - def test_east_asian_unicode_false(self): # not aligned properly because of east asian width @@ -1028,10 +1009,6 @@ def test_datetimeindex_highprecision(self, start_date): result = str(df.index) assert start_date in result - def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({"c/\u03c3": Series({"test": np.nan})}) - str(dm.to_string()) - def test_string_repr_encoding(self, datapath): filepath = datapath("io", "parser", "data", "unicode_series.csv") df = read_csv(filepath, header=None, encoding="latin1") diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index a8f6c07e9ef5b..3a8dcb339b063 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -4,6 +4,7 @@ ) from io import StringIO import re +import sys from textwrap import dedent import numpy as np @@ -936,6 +937,28 @@ def test_to_string_nonunicode_nonascii_alignment(self): lines = rep_str.split("\n") assert len(lines[1]) == len(lines[2]) + def test_unicode_problem_decoding_as_ascii(self): + df = DataFrame({"c/\u03c3": Series({"test": np.nan})}) + str(df.to_string()) + + def test_to_string_repr_unicode(self): + buf = StringIO() + + unicode_values = ["\u03c3"] * 10 + unicode_values = np.array(unicode_values, dtype=object) + df = DataFrame({"unicode": unicode_values}) + df.to_string(col_space=10, buf=buf) + + # it works! + repr(df) + # it works even if sys.stdin in None + _stdin = sys.stdin + try: + sys.stdin = None + repr(df) + finally: + sys.stdin = _stdin + class TestSeriesToString: def test_to_string_without_index(self):