diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9168041a4f474..9f6ef102b6a29 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -575,6 +575,7 @@ Datetimelike - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`) - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`) - Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) +- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) Timedelta ^^^^^^^^^ diff --git a/pandas/conftest.py b/pandas/conftest.py index cb5b4145855d1..f155f768f6929 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -712,6 +712,7 @@ def float_frame(): DatetimeTZDtype(tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), + (Timedelta(nanoseconds=1), "timedelta64[ns]"), # GH38032 ] ) def ea_scalar_and_dtype(request): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 12974d56dacdc..326db73264d19 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1611,13 +1611,42 @@ def cast_scalar_to_array( ndarray of shape, filled with value, of specified / inferred dtype """ + # that's what the type annotation indicates + assert isinstance(dtype, (type(None), str, np.dtype)) + if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) else: - fill_value = value + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + empty = shape and not any(shape) + # dtype coercion when empty: sometimes yes, sometimes no? + + if not empty and is_integer_dtype(dtype) and isna(value): + # coerce if we have nan for an integer dtype + dtype = np.dtype("float64") + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = np.dtype("object") + if not isna(value): + value = ensure_str(value) + elif dtype.kind == "m": + # GH38032: filling in Timedelta/Timestamp drops nanoseconds + if isinstance(value, Timedelta): + value = value.to_numpy() + # GH36541: filling datetime-like array directly with pd.NaT + # raises ValueError: cannot convert float NaN to integer + elif is_valid_nat_for_dtype(value, dtype): + value = np.timedelta64("NaT") + elif dtype.kind == "M": + if isinstance(value, Timestamp): + value = value.to_numpy() + elif is_valid_nat_for_dtype(value, dtype): + value = np.datetime64("NaT") values = np.empty(shape, dtype=dtype) - values.fill(fill_value) + values.fill(value) return values @@ -1643,26 +1672,8 @@ def construct_1d_arraylike_from_scalar( if is_extension_array_dtype(dtype): cls = dtype.construct_array_type() subarr = cls._from_sequence([value] * length, dtype=dtype) - else: - - if length and is_integer_dtype(dtype) and isna(value): - # coerce if we have nan for an integer dtype - dtype = np.dtype("float64") - elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): - # we need to coerce to object dtype to avoid - # to allow numpy to take our string as a scalar value - dtype = np.dtype("object") - if not isna(value): - value = ensure_str(value) - elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype): - # GH36541: can't fill array directly with pd.NaT - # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) - # ValueError: cannot convert float NaN to integer - value = np.datetime64("NaT") - - subarr = np.empty(length, dtype=dtype) - subarr.fill(value) + subarr = cast_scalar_to_array((length,), value, dtype) return subarr diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 85e35dbb86f1c..5c3eceaaf4ffd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3923,12 +3923,7 @@ def reindexer(value): value, len(self.index), infer_dtype ) else: - # pandas\core\frame.py:3827: error: Argument 1 to - # "cast_scalar_to_array" has incompatible type "int"; expected - # "Tuple[Any, ...]" [arg-type] - value = cast_scalar_to_array( - len(self.index), value # type: ignore[arg-type] - ) + value = cast_scalar_to_array((len(self.index),), value) value = maybe_cast_to_datetime(value, infer_dtype) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 157adacbdfdf7..178f9a0343e00 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -13,6 +13,7 @@ from pandas import ( Categorical, Interval, + NaT, Period, Series, Timedelta, @@ -188,11 +189,32 @@ def test_infer_dtype_from_array(arr, expected, pandas_dtype): (Period("2011-01-01", freq="D"), object), ], ) -def test_cast_scalar_to_array(obj, dtype): - shape = (3, 2) - +@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) +def test_cast_scalar_to_array(obj, dtype, shape): exp = np.empty(shape, dtype=dtype) exp.fill(obj) - arr = cast_scalar_to_array(shape, obj, dtype=dtype) + arr = cast_scalar_to_array(shape, obj, dtype=np.dtype(dtype)) tm.assert_numpy_array_equal(arr, exp) + + +@pytest.mark.parametrize( + "obj_in,dtype_in,obj_out,dtype_out", + [ + (NaT, "timedelta64[ns]", np.timedelta64("NaT"), "timedelta64[ns]"), + (Timedelta(1), "timedelta64[ns]", 1, "timedelta64[ns]"), + (NaT, "datetime64[ns]", np.datetime64("NaT"), "datetime64[ns]"), + (Timestamp(1), "datetime64[ns]", 1, "datetime64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]", 1, "datetime64[ns]"), + (np.nan, np.int64, np.nan, np.float64), + ("hello", "U", "hello", object), + ("hello", "S", "hello", object), + ], +) +@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) +def test_cast_scalar_to_array_conversion_needed( + obj_in, dtype_in, obj_out, dtype_out, shape +): + result = cast_scalar_to_array(shape, obj_in, dtype=np.dtype(dtype_in)) + expected = np.full(shape, obj_out, dtype=dtype_out) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a98723e9e31f8..f850cc140beb1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1934,6 +1934,21 @@ def test_constructor_datetimes_with_nulls(self, arr): expected = Series([np.dtype("datetime64[ns]")]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "scalar,dtype", + [ + (Timedelta(1), "timedelta64[ns]"), + (Timestamp(1), "datetime64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]"), + ], + ) + def test_constructor_timelike_nanoseconds(self, scalar, dtype): + # GH38032 + df = DataFrame(scalar, index=[0], columns=[0], dtype=dtype) + result = df.at[0, 0].value + expected = scalar.value + assert result == expected + def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5b13091470b09..3d17a797d486e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -24,6 +24,7 @@ Period, RangeIndex, Series, + Timedelta, Timestamp, date_range, isna, @@ -1319,6 +1320,21 @@ def test_constructor_dtype_timedelta64(self): s = Series([pd.NaT, np.nan, "1 Day"]) assert s.dtype == "timedelta64[ns]" + @pytest.mark.parametrize( + "scalar,dtype", + [ + (Timedelta(1), "timedelta64[ns]"), + (Timestamp(1), "datetime64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]"), + ], + ) + def test_constructor_timelike_nanoseconds(self, scalar, dtype): + # GH38032 + ser = Series(scalar, index=[0], dtype=dtype) + result = ser[0].value + expected = scalar.value + assert result == expected + # GH 16406 def test_constructor_mixed_tz(self): s = Series([Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")])