diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8c475791df64d..5382e56544907 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -686,6 +686,7 @@ Reshaping - Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`) - Bug in :meth:`DataFrame.pivot_table` where the row margin is incorrect when the columns have numeric names (:issue:`26568`) - Bug in :meth:`DataFrame.pivot` with numeric columns and extension dtype for data (:issue:`56528`) +- Bug in :meth:`DataFrame.stack` and :meth:`Series.stack` with ``future_stack=True`` would not preserve NA values in the index (:issue:`56573`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 8c822ec58e011..7a49682d7c57c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -953,8 +953,8 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: index_levels = frame.index.levels index_codes = list(np.tile(frame.index.codes, (1, ratio))) else: - index_levels = [frame.index.unique()] - codes = factorize(frame.index)[0] + codes, uniques = factorize(frame.index, use_na_sentinel=False) + index_levels = [uniques] index_codes = list(np.tile(codes, (1, ratio))) if isinstance(stack_cols, MultiIndex): column_levels = ordered_stack_cols.levels diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 554a9d4ce2d5d..6e1e743eb60de 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2638,3 +2638,41 @@ def test_stack_tuple_columns(future_stack): ), ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype, na_value", + [ + ("float64", np.nan), + ("Float64", np.nan), + ("Float64", pd.NA), + ("Int64", pd.NA), + ], +) +@pytest.mark.parametrize("test_multiindex", [True, False]) +def test_stack_preserves_na(dtype, na_value, test_multiindex): + # GH#56573 + if test_multiindex: + index = MultiIndex.from_arrays(2 * [Index([na_value], dtype=dtype)]) + else: + index = Index([na_value], dtype=dtype) + df = DataFrame({"a": [1]}, index=index) + result = df.stack(future_stack=True) + + if test_multiindex: + expected_index = MultiIndex.from_arrays( + [ + Index([na_value], dtype=dtype), + Index([na_value], dtype=dtype), + Index(["a"]), + ] + ) + else: + expected_index = MultiIndex.from_arrays( + [ + Index([na_value], dtype=dtype), + Index(["a"]), + ] + ) + expected = Series(1, index=expected_index) + tm.assert_series_equal(result, expected)