diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e9e13c3ed5bbe..ebb4708ada45b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -780,7 +780,7 @@ Indexing - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) - Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) -- +- Bug in :meth:`DataFrame.sum` min_count changes dtype if input contains NaNs (:issue:`46947`) Missing ^^^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 713d80c26ef7a..a96fb9c8129dd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1471,8 +1471,8 @@ def _maybe_null_out( if is_numeric_dtype(result): if np.iscomplexobj(result): result = result.astype("c16") - else: - result = result.astype("f8") + elif not is_float_dtype(result): + result = result.astype("f8", copy=False) result[null_mask] = np.nan else: # GH12941, use None to auto cast null diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 5226510f03195..702faf2f480e8 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -778,6 +778,23 @@ def test_sum_nanops_min_count(self): expected = Series([np.nan, np.nan], index=["x", "y"]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"]) + @pytest.mark.parametrize( + "kwargs, expected_result", + [ + ({"axis": 1, "min_count": 2}, [3.2, 5.3, np.NaN]), + ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]), + ({"axis": 1, "skipna": False}, [3.2, 5.3, np.NaN]), + ], + ) + def test_sum_nanops_dtype_min_count(self, float_type, kwargs, expected_result): + # GH#46947 + # pass + df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype=float_type) + result = df.sum(**kwargs) + expected = Series(expected_result).astype(float_type) + tm.assert_series_equal(result, expected) + def test_sum_object(self, float_frame): values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)