diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ab9f303bec6aa..2a316a3194b14 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -86,7 +86,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - +- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - - diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fafa1f51c823e..a928a86fead26 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,7 +20,7 @@ import numpy as np -from pandas._libs import lib, tslib, tslibs +from pandas._libs import lib, tslib from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -151,13 +151,35 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal if dtype == object: pass elif isinstance(value, (np.datetime64, datetime)): - value = tslibs.Timestamp(value) + value = Timestamp(value) elif isinstance(value, (np.timedelta64, timedelta)): - value = tslibs.Timedelta(value) + value = Timedelta(value) return value +def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: + """ + Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting + into a numpy array. Failing to unbox would risk dropping nanoseconds. + + Notes + ----- + Caller is responsible for checking dtype.kind in ["m", "M"] + """ + if is_valid_nat_for_dtype(value, dtype): + # GH#36541: can't fill array directly with pd.NaT + # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) + # ValueError: cannot convert float NaN to integer + value = dtype.type("NaT", "ns") + elif isinstance(value, Timestamp): + if value.tz is None: + value = value.to_datetime64() + elif isinstance(value, Timedelta): + value = value.to_timedelta64() + return value + + def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): """ try to cast to the specified dtype (e.g. convert back to bool/int @@ -1428,8 +1450,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") if is_scalar(value): - if value == iNaT or isna(value): - value = iNaT + value = maybe_unbox_datetimelike(value, dtype) elif not is_sparse(value): value = np.array(value, copy=False) @@ -1602,11 +1623,8 @@ def construct_1d_arraylike_from_scalar( dtype = np.dtype("object") if not isna(value): value = ensure_str(value) - elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype): - # GH36541: can't fill array directly with pd.NaT - # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) - # ValueError: cannot convert float NaN to integer - value = dtype.type("NaT", "ns") + elif dtype.kind in ["M", "m"]: + value = maybe_unbox_datetimelike(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66399f2b9a5e4..bbb874e141f5d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -87,6 +87,7 @@ maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, + maybe_unbox_datetimelike, validate_numeric_casting, ) from pandas.core.dtypes.common import ( @@ -593,6 +594,9 @@ def __init__( ] mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) else: + if dtype.kind in ["m", "M"]: + data = maybe_unbox_datetimelike(data, dtype) + # Attempt to coerce to a numpy array try: arr = np.array(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py index ed272cef3e7ba..a6f97563311a7 100644 --- a/pandas/tests/dtypes/cast/test_construct_from_scalar.py +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -1,7 +1,9 @@ +import numpy as np + from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import Categorical +from pandas import Categorical, Timedelta, Timestamp import pandas._testing as tm @@ -16,3 +18,17 @@ def test_cast_1d_array_like_from_scalar_categorical(): result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) tm.assert_categorical_equal(result, expected) + + +def test_cast_1d_array_like_from_timestamp(): + # check we dont lose nanoseconds + ts = Timestamp.now() + Timedelta(1) + res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]")) + assert res[0] == ts + + +def test_cast_1d_array_like_from_timedelta(): + # check we dont lose nanoseconds + td = Timedelta(1) + res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]")) + assert res[0] == td diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d61cb9ea8abbf..3d4fbc74d0e6a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -11,7 +11,7 @@ import pytz from pandas.compat import is_platform_little_endian -from pandas.compat.numpy import _np_version_under1p19 +from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20 from pandas.core.dtypes.common import is_integer_dtype from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype @@ -2915,3 +2915,50 @@ def test_construction_from_set_raises(self): msg = "Set type is unordered" with pytest.raises(TypeError, match=msg): DataFrame({"a": {1, 2, 3}}) + + +def get1(obj): + if isinstance(obj, Series): + return obj.iloc[0] + else: + return obj.iloc[0, 0] + + +class TestFromScalar: + @pytest.fixture + def constructor(self, frame_or_series): + if frame_or_series is Series: + return functools.partial(Series, index=range(2)) + else: + return functools.partial(DataFrame, index=range(2), columns=range(2)) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_from_nat_scalar(self, dtype, constructor): + obj = constructor(pd.NaT, dtype=dtype) + assert np.all(obj.dtypes == dtype) + assert np.all(obj.isna()) + + def test_from_timedelta_scalar_preserves_nanos(self, constructor): + td = Timedelta(1) + + obj = constructor(td, dtype="m8[ns]") + assert get1(obj) == td + + def test_from_timestamp_scalar_preserves_nanos(self, constructor): + ts = Timestamp.now() + Timedelta(1) + + obj = Series(ts, index=range(1), dtype="M8[ns]") + assert get1(obj) == ts + + def test_from_timedelta64_scalar_object(self, constructor, request): + if constructor.func is DataFrame and _np_version_under1p20: + mark = pytest.mark.xfail( + reason="np.array(td64, dtype=object) converts to int" + ) + request.node.add_marker(mark) + + td = Timedelta(1) + td64 = td.to_timedelta64() + + obj = constructor(td64, dtype=object) + assert isinstance(get1(obj), np.timedelta64)