diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dc3fecba7fb8c..b2c1e38f61f4c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11434,6 +11434,12 @@ def cov( c -0.150812 0.191417 0.895202 """ data = self._get_numeric_data() if numeric_only else self + if any(blk.dtype.kind in "mM" for blk in self._mgr.blocks): + msg = ( + "DataFrame contains columns with dtype datetime64 " + "or timedelta64, which are not supported for cov." + ) + raise TypeError(msg) cols = data.columns idx = cols.copy() mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e238bb78bbdfa..cb290fde7095c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1800,6 +1800,8 @@ def as_array( arr = np.asarray(blk.values, dtype=dtype) else: arr = np.array(blk.values, dtype=dtype, copy=copy) + if passed_nan and blk.dtype.kind in "mM": + arr[isna(blk.values)] = na_value if not copy: arr = arr.view() @@ -1865,6 +1867,8 @@ def _interleave( else: arr = blk.get_values(dtype) result[rl.indexer] = arr + if na_value is not lib.no_default and blk.dtype.kind in "mM": + result[rl.indexer][isna(arr)] = na_value itemmask[rl.indexer] = 1 if not itemmask.all(): diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index 36088cceb13f1..f68d7f533645d 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -3,7 +3,9 @@ from pandas import ( DataFrame, + NaT, Timestamp, + date_range, ) import pandas._testing as tm @@ -41,3 +43,37 @@ def test_to_numpy_mixed_dtype_to_str(self): result = df.to_numpy(dtype=str) expected = np.array([["2020-01-01 00:00:00", "100.0"]], dtype=str) tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_datetime_with_na(self): + # GH #53115 + dti = date_range("2016-01-01", periods=3) + df = DataFrame(dti) + df.iloc[0, 0] = NaT + expected = np.array([[np.nan], [1.45169280e18], [1.45177920e18]]) + result = df.to_numpy(float, na_value=np.nan) + tm.assert_numpy_array_equal(result, expected) + + df = DataFrame( + { + "a": [Timestamp("1970-01-01"), Timestamp("1970-01-02"), NaT], + "b": [ + Timestamp("1970-01-01"), + np.nan, + Timestamp("1970-01-02"), + ], + "c": [ + 1, + np.nan, + 2, + ], + } + ) + expected = np.array( + [ + [0.00e00, 0.00e00, 1.00e00], + [8.64e04, np.nan, np.nan], + [np.nan, 8.64e04, 2.00e00], + ] + ) + result = df.to_numpy(float, na_value=np.nan) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 127f0fc50a747..cc23c292b66dc 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1917,6 +1917,39 @@ def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype): expected = Series([pd.NA, pd.NA], dtype=exp_dtype, index=Index([0, 1])) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "data", + [ + {"a": [0, 1, 2], "b": [pd.NaT, pd.NaT, pd.NaT]}, + {"a": [0, 1, 2], "b": [Timestamp("1990-01-01"), pd.NaT, pd.NaT]}, + { + "a": [0, 1, 2], + "b": [ + Timestamp("1990-01-01"), + Timestamp("1991-01-01"), + Timestamp("1992-01-01"), + ], + }, + { + "a": [0, 1, 2], + "b": [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.NaT], + }, + { + "a": [0, 1, 2], + "b": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + }, + ], + ) + def test_df_cov_pd_nat(self, data): + # GH #53115 + df = DataFrame(data) + with pytest.raises(TypeError, match="not supported for cov"): + df.cov() + def test_sum_timedelta64_skipna_false(): # GH#17235