From 19fd2b3ef392fd274657e5a9f57c894e529f43da Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 28 Aug 2023 14:08:04 +0200 Subject: [PATCH 1/2] BUG: repr aligning left for string dtype columns --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/indexes/base.py | 2 ++ pandas/tests/frame/test_repr_info.py | 11 +++++++++++ 3 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 19a8500928ab7..8f2667d69a322 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -717,6 +717,7 @@ Conversion Strings ^^^^^^^ - Bug in :meth:`Series.str` that did not raise a ``TypeError`` when iterated (:issue:`54173`) +- Bug in ``repr`` for :class:`DataFrame`` with string-dtype columns (:issue:`54797`) Interval ^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a49db84450bb3..3fa1d8b3ec48a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1395,6 +1395,8 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t] from pandas.io.formats.format import format_array values = self._values + if is_string_dtype(values.dtype): + values = np.asarray(values) if is_object_dtype(values.dtype): values = cast(np.ndarray, values) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 49375658abfee..64d516e484991 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -455,3 +455,14 @@ def test_masked_ea_with_formatter(self): 0 0.12 1.00 1 1.12 2.00""" assert result == expected + + def test_repr_ea_columns(self, any_string_dtype): + # GH#54797 + pytest.importorskip("pyarrow") + df = DataFrame({"long_column_name": [1, 2, 3], "col2": [4, 5, 6]}) + df.columns = df.columns.astype(any_string_dtype) + expected = """ long_column_name col2 +0 1 4 +1 2 5 +2 3 6""" + assert repr(df) == expected From a7b48c88ba2c22d49c65a4c61eb735d4d1de5edd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:58:25 +0200 Subject: [PATCH 2/2] Update pandas/core/indexes/base.py Co-authored-by: Joris Van den Bossche --- pandas/core/indexes/base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3fa1d8b3ec48a..4e037cc3e8626 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1395,11 +1395,9 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t] from pandas.io.formats.format import format_array values = self._values - if is_string_dtype(values.dtype): - values = np.asarray(values) - if is_object_dtype(values.dtype): - values = cast(np.ndarray, values) + if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): + values = np.asarray(values) values = lib.maybe_convert_objects(values, safe=True) result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]