diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bdf811f6a8f6a..ecec18c8b8242 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -958,6 +958,7 @@ I/O - Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`) - Bug in :func:`read_excel` not forward filling :class:`MultiIndex` when no names were given (:issue:`47487`) - Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`) +- Bug in :meth:`DataFrame.to_string` using wrong missing value with extension arrays in :class:`MultiIndex` (:issue:`47986`) - Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`) - Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`) - Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 60f727f54b621..5a9b1e6943608 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -54,6 +54,7 @@ ensure_int64, ensure_platform_int, is_categorical_dtype, + is_extension_array_dtype, is_hashable, is_integer, is_iterator, @@ -1370,7 +1371,7 @@ def format( stringified_levels = [] for lev, level_codes in zip(self.levels, self.codes): - na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) if len(lev) > 0: @@ -3889,6 +3890,11 @@ def sparsify_labels(label_list, start: int = 0, sentinel=""): def _get_na_rep(dtype) -> str: + if is_extension_array_dtype(dtype): + return f"{dtype.na_value}" + else: + dtype = dtype.type + return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index f42660b297cb0..86c8e36cb7bd4 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -9,6 +9,7 @@ import pytest from pandas import ( + NA, Categorical, DataFrame, MultiIndex, @@ -342,6 +343,19 @@ def test_frame_to_string_with_periodindex(self): # it works! frame.to_string() + def test_to_string_ea_na_in_multiindex(self): + # GH#47986 + df = DataFrame( + {"a": [1, 2]}, + index=MultiIndex.from_arrays([Series([NA, 1], dtype="Int64")]), + ) + + result = df.to_string() + expected = """ a + 1 +1 2""" + assert result == expected + def test_datetime64tz_slice_non_truncate(self): # GH 30263 df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")})