diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e16ef0857685d..c5c2f8ece4325 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -788,6 +788,7 @@ I/O - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`) - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`) - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) +- Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5a931fe29ab1..d4532aab97bea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6473,12 +6473,12 @@ def convert_dtypes( 2 3 z 20 200.0 >>> dfn.dtypes - a Int32 - b string - c boolean - d string - e Int64 - f Float64 + a Int32 + b string[python] + c boolean + d string[python] + e Int64 + f Float64 dtype: object Start with a Series of strings and missing data represented by ``np.nan``. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 61c12f5011886..1328e77219153 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -89,6 +89,7 @@ DatetimeArray, TimedeltaArray, ) +from pandas.core.arrays.string_ import StringDtype from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.construction import extract_array @@ -1395,6 +1396,8 @@ def _format(x): return self.na_rep elif isinstance(x, PandasObject): return str(x) + elif isinstance(x, StringDtype): + return repr(x) else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 5e7aeb7f226de..31ba018a178ca 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -5,6 +5,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( DataFrame, Series, @@ -338,3 +340,20 @@ def test_to_string_max_rows_zero(data, expected): # GH35394 result = DataFrame(data=data).to_string(max_rows=0) assert result == expected + + +@td.skip_if_no("pyarrow") +def test_to_string_string_dtype(): + # GH#50099 + df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]}) + df = df.astype( + {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"} + ) + result = df.dtypes.to_string() + expected = dedent( + """\ + x string[pyarrow] + y string[python] + z int64[pyarrow]""" + ) + assert result == expected