From b685ecd126d469fa2690dac25e313c84b230e5ad Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 3 Jul 2020 14:14:15 +0100 Subject: [PATCH] BUG: reset_index is passing a bad dtype to NumPy --- pandas/core/frame.py | 10 +++++++--- pandas/tests/frame/methods/test_reset_index.py | 17 ++++++++++++++--- pandas/tests/series/methods/test_reset_index.py | 17 ++++++++++++++--- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6993e9ed851a..87041341ac3a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -75,6 +75,7 @@ from pandas.core.dtypes.cast import ( cast_scalar_to_array, coerce_to_dtypes, + construct_1d_arraylike_from_scalar, find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, @@ -109,7 +110,7 @@ needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna from pandas.core import algorithms, common as com, nanops, ops from pandas.core.accessor import CachedAccessor @@ -4731,8 +4732,11 @@ def _maybe_casted_values(index, labels=None): # we can have situations where the whole mask is -1, # meaning there is nothing found in labels, so make all nan's if mask.all(): - values = np.empty(len(mask), dtype=index.dtype) - values.fill(np.nan) + dtype = index.dtype + fill_value = na_value_for_dtype(dtype) + values = construct_1d_arraylike_from_scalar( + fill_value, len(mask), dtype + ) else: values = values.take(labels) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 79442acccb326..cf0bbe144caa5 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -3,6 +3,7 @@ import numpy as np import pytest +import pandas as pd from pandas import ( DataFrame, Index, @@ -299,9 +300,19 @@ def test_reset_index_range(self): tm.assert_frame_equal(result, expected) -def test_reset_index_dtypes_on_empty_frame_with_multiindex(): +@pytest.mark.parametrize( + "array, dtype", + [ + (["a", "b"], object), + ( + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.PeriodDtype(freq="Q-DEC"), + ), + ], +) +def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype): # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex - idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], ["a", "b"]]) + idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) result = DataFrame(index=idx)[:0].reset_index().dtypes - expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": object}) + expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index a11590d42552d..597b43a370ef5 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series import pandas._testing as tm @@ -110,11 +111,21 @@ def test_reset_index_drop_errors(self): s.reset_index("wrong", drop=True) -def test_reset_index_dtypes_on_empty_series_with_multiindex(): +@pytest.mark.parametrize( + "array, dtype", + [ + (["a", "b"], object), + ( + pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), + pd.PeriodDtype(freq="Q-DEC"), + ), + ], +) +def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype): # GH 19602 - Preserve dtype on empty Series with MultiIndex - idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], ["a", "b"]]) + idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) result = Series(dtype=object, index=idx)[:0].reset_index().dtypes expected = Series( - {"level_0": np.int64, "level_1": np.float64, "level_2": object, 0: object} + {"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object} ) tm.assert_series_equal(result, expected)