From 0aa14544ba6ba2b6758fa7462fd32af916d087c5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 9 Dec 2022 15:26:48 +0100 Subject: [PATCH 1/3] BUG: displaying string dtypes not showing storage option --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/io/formats/format.py | 3 +++ pandas/tests/io/formats/test_to_string.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d6e0bb2ae0830..8ce8c4610fb92 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -787,6 +787,7 @@ I/O - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`) - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`) - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) +- Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 61c12f5011886..1328e77219153 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -89,6 +89,7 @@ DatetimeArray, TimedeltaArray, ) +from pandas.core.arrays.string_ import StringDtype from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.construction import extract_array @@ -1395,6 +1396,8 @@ def _format(x): return self.na_rep elif isinstance(x, PandasObject): return str(x) + elif isinstance(x, StringDtype): + return repr(x) else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 5e7aeb7f226de..4338b780ea1f0 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -338,3 +338,19 @@ def test_to_string_max_rows_zero(data, expected): # GH35394 result = DataFrame(data=data).to_string(max_rows=0) assert result == expected + + +def test_to_string_string_dtype(): + # GH#50099 + df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]}) + df = df.astype( + {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"} + ) + result = df.dtypes.to_string() + expected = dedent( + """\ + x string[pyarrow] + y string[python] + z int64[pyarrow]""" + ) + assert result == expected From befb05f111810f7793209820f40048e35e3c5d83 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 9 Dec 2022 18:00:26 +0100 Subject: [PATCH 2/3] Skip when no pyarrow --- pandas/tests/io/formats/test_to_string.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 4338b780ea1f0..7022019e1eb27 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.compat import pa_version_under6p0 + from pandas import ( DataFrame, Series, @@ -342,6 +344,9 @@ def test_to_string_max_rows_zero(data, expected): def test_to_string_string_dtype(): # GH#50099 + if pa_version_under6p0: + pytest.skip() + df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]}) df = df.astype( {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"} From e345684c49fe18089c183929f451fdad60e82ecd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 12 Dec 2022 20:13:13 +0100 Subject: [PATCH 3/3] Address review --- pandas/core/generic.py | 12 ++++++------ pandas/tests/io/formats/test_to_string.py | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5a931fe29ab1..d4532aab97bea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6473,12 +6473,12 @@ def convert_dtypes( 2 3 z 20 200.0 >>> dfn.dtypes - a Int32 - b string - c boolean - d string - e Int64 - f Float64 + a Int32 + b string[python] + c boolean + d string[python] + e Int64 + f Float64 dtype: object Start with a Series of strings and missing data represented by ``np.nan``. diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 7022019e1eb27..31ba018a178ca 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under6p0 +import pandas.util._test_decorators as td from pandas import ( DataFrame, @@ -342,11 +342,9 @@ def test_to_string_max_rows_zero(data, expected): assert result == expected +@td.skip_if_no("pyarrow") def test_to_string_string_dtype(): # GH#50099 - if pa_version_under6p0: - pytest.skip() - df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]}) df = df.astype( {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"}