From 7f7603b481ea55589ee4d3c0f5c6cd370b72038d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 15 Dec 2021 16:28:08 +0100 Subject: [PATCH 1/2] Bug in concat casting all na levels to float --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/arrays/categorical.py | 9 ++++++++- pandas/tests/arrays/categorical/test_missing.py | 14 ++++++++++++++ pandas/tests/reshape/concat/test_datetimes.py | 13 +++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 372f991d96a22..9dc7b161725ee 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -806,6 +806,7 @@ Reshaping - Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`) - Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`) - Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`) +- Bug in :func:`concat` casting levels of :class:`MultiIndex` to float if the only consist of missing values (:issue:`44900`) - Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`) - Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`) - Bug in :class:`MultiIndex` failing join operations with overlapping ``IntervalIndex`` levels (:issue:`44096`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 67dc6ade25254..e801f8e43fb36 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -422,7 +422,14 @@ def __init__( # We remove null values here, then below will re-insert # them, grep "full_codes" arr_list = [values[idx] for idx in np.where(~null_mask)[0]] - arr = sanitize_array(arr_list, None) + + # Do not cast to float if we have only missing values + if arr_list or arr.dtype == "object": + sanitize_dtype = None + else: + sanitize_dtype = arr.dtype + + arr = sanitize_array(arr_list, None, dtype=sanitize_dtype) values = arr if dtype.categories is None: diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index f419aa6f181f2..fb5330a9665ff 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -197,3 +197,17 @@ def test_compare_categorical_with_missing(self, a1, a2, categories): result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type) expected = Series(a1) == Series(a2) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "na_value, dtype", + [ + (pd.NaT, "datetime64[ns]"), + (None, "float64"), + (np.nan, "float64"), + (pd.NA, "float64"), + ], + ) + def test_categorical_only_missing_values_no_cast(self, na_value, dtype): + # GH#44900 + result = Categorical([na_value, na_value]) + tm.assert_index_equal(result.categories, Index([], dtype=dtype)) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index c4fe16b43313a..1af54a1d5cf4a 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -528,3 +528,16 @@ def test_concat_timedelta64_block(): result = concat([df, df]) tm.assert_frame_equal(result.iloc[:10], df) tm.assert_frame_equal(result.iloc[10:], df) + + +def test_concat_multiindex_datetime_nat(): + # GH#44900 + left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)])) + right = DataFrame( + {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) + ) + result = concat([left, right], axis="columns") + expected = DataFrame( + {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) + ) + tm.assert_frame_equal(result, expected) From 58320498313f19b399b3d38448924c42a5d2b9bc Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 16 Dec 2021 20:01:47 +0100 Subject: [PATCH 2/2] Add gh ref --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index e801f8e43fb36..b34a7fff68edf 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -423,7 +423,7 @@ def __init__( # them, grep "full_codes" arr_list = [values[idx] for idx in np.where(~null_mask)[0]] - # Do not cast to float if we have only missing values + # GH#44900 Do not cast to float if we have only missing values if arr_list or arr.dtype == "object": sanitize_dtype = None else: