From e5c2f2b54eba6fc8b84302b94670e795a54ebff0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Dec 2020 18:42:17 -0800 Subject: [PATCH 1/4] BUG: Series/DataFrame construction from scalars --- pandas/core/construction.py | 3 -- pandas/core/dtypes/cast.py | 38 ++++++++++++----- pandas/core/frame.py | 4 ++ .../dtypes/cast/test_construct_from_scalar.py | 18 +++++++- pandas/tests/frame/test_constructors.py | 41 +++++++++++++++++++ 5 files changed, 90 insertions(+), 14 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 44224f9709699..e350bdf38dd81 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -488,9 +488,6 @@ def sanitize_array( arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif lib.is_scalar(data) and index is not None and dtype is not None: - data = maybe_cast_to_datetime(data, dtype) - if not lib.is_scalar(data): - data = data[0] subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 165e63e23d60e..23ee936162b06 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,7 +20,7 @@ import numpy as np -from pandas._libs import lib, tslib, tslibs +from pandas._libs import lib, tslib from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -151,13 +151,35 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal if dtype == object: pass elif isinstance(value, (np.datetime64, datetime)): - value = tslibs.Timestamp(value) + value = Timestamp(value) elif isinstance(value, (np.timedelta64, timedelta)): - value = tslibs.Timedelta(value) + value = Timedelta(value) return value +def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: + """ + Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting + into a numpy array. Failing to unbox would risk dropping nanoseconds. + + Notes + ----- + Caller is responsible for checking dtype.kidn in ["m", "M"] + """ + if is_valid_nat_for_dtype(value, dtype): + # GH#36541: can't fill array directly with pd.NaT + # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) + # ValueError: cannot convert float NaN to integer + value = dtype.type("NaT", "ns") + elif isinstance(value, Timestamp): + if value.tz is None: + value = value.to_datetime64() + elif isinstance(value, Timedelta): + value = value.to_timedelta64() + return value + + def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): """ try to cast to the specified dtype (e.g. convert back to bool/int @@ -1428,8 +1450,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") if is_scalar(value): - if value == iNaT or isna(value): - value = iNaT + value = maybe_unbox_datetimelike(value, dtype) elif not is_sparse(value): value = np.array(value, copy=False) @@ -1598,11 +1619,8 @@ def construct_1d_arraylike_from_scalar( dtype = np.dtype("object") if not isna(value): value = ensure_str(value) - elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype): - # GH36541: can't fill array directly with pd.NaT - # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) - # ValueError: cannot convert float NaN to integer - value = dtype.type("NaT", "ns") + elif dtype.kind in ["M", "m"]: + value = maybe_unbox_datetimelike(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index de60cda382fba..ea5ec869127a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -88,6 +88,7 @@ maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, + maybe_unbox_datetimelike, maybe_upcast, validate_numeric_casting, ) @@ -601,6 +602,9 @@ def __init__( ] mgr = arrays_to_mgr(values, columns, index, columns, dtype=None) else: + if dtype.kind in ["m", "M"]: + data = maybe_unbox_datetimelike(data, dtype) + # Attempt to coerce to a numpy array try: arr = np.array(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py index ed272cef3e7ba..a6f97563311a7 100644 --- a/pandas/tests/dtypes/cast/test_construct_from_scalar.py +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -1,7 +1,9 @@ +import numpy as np + from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import Categorical +from pandas import Categorical, Timedelta, Timestamp import pandas._testing as tm @@ -16,3 +18,17 @@ def test_cast_1d_array_like_from_scalar_categorical(): result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) tm.assert_categorical_equal(result, expected) + + +def test_cast_1d_array_like_from_timestamp(): + # check we dont lose nanoseconds + ts = Timestamp.now() + Timedelta(1) + res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]")) + assert res[0] == ts + + +def test_cast_1d_array_like_from_timedelta(): + # check we dont lose nanoseconds + td = Timedelta(1) + res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]")) + assert res[0] == td diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d61cb9ea8abbf..76e8cdcf6a3bd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2915,3 +2915,44 @@ def test_construction_from_set_raises(self): msg = "Set type is unordered" with pytest.raises(TypeError, match=msg): DataFrame({"a": {1, 2, 3}}) + + +def get1(obj): + if isinstance(obj, Series): + return obj.iloc[0] + else: + return obj.iloc[0, 0] + + +class TestFromScalar: + @pytest.fixture + def constructor(self, frame_or_series): + if frame_or_series is Series: + return functools.partial(Series, index=range(2)) + else: + return functools.partial(DataFrame, index=range(2), columns=range(2)) + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_from_nat_scalar(self, dtype, constructor): + obj = constructor(pd.NaT, dtype=dtype) + assert np.all(obj.dtypes == dtype) + assert np.all(obj.isna()) + + def test_from_timedelta_scalar_preserves_nanos(self, constructor): + td = Timedelta(1) + + obj = constructor(td, dtype="m8[ns]") + assert get1(obj) == td + + def test_from_timestamp_scalar_preserves_nanos(self, constructor): + ts = Timestamp.now() + Timedelta(1) + + obj = Series(ts, index=range(1), dtype="M8[ns]") + assert get1(obj) == ts + + def test_from_timedelta64_scalar_object(self, constructor): + td = Timedelta(1) + td64 = td.to_timedelta64() + + obj = constructor(td64, dtype=object) + assert isinstance(get1(obj), np.timedelta64) From 9c643baf38475e9c38d57a390bcc1c1db9769961 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Dec 2020 20:02:04 -0800 Subject: [PATCH 2/4] xfail on some numpys --- pandas/tests/frame/test_constructors.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 76e8cdcf6a3bd..3d4fbc74d0e6a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -11,7 +11,7 @@ import pytz from pandas.compat import is_platform_little_endian -from pandas.compat.numpy import _np_version_under1p19 +from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20 from pandas.core.dtypes.common import is_integer_dtype from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype @@ -2950,7 +2950,13 @@ def test_from_timestamp_scalar_preserves_nanos(self, constructor): obj = Series(ts, index=range(1), dtype="M8[ns]") assert get1(obj) == ts - def test_from_timedelta64_scalar_object(self, constructor): + def test_from_timedelta64_scalar_object(self, constructor, request): + if constructor.func is DataFrame and _np_version_under1p20: + mark = pytest.mark.xfail( + reason="np.array(td64, dtype=object) converts to int" + ) + request.node.add_marker(mark) + td = Timedelta(1) td64 = td.to_timedelta64() From dcfffaf5f30e9a783a02f7caf2e5efd8d6fcf231 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 11 Dec 2020 09:48:14 -0800 Subject: [PATCH 3/4] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d0afc24aaecac..5b07a1b9fcca2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -86,7 +86,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - +- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - - From c0641164665113a6c52a9f06cbb901b700f2325e Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Dec 2020 09:32:50 -0800 Subject: [PATCH 4/4] typo fixup --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index cd286f76b4d8f..a928a86fead26 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -165,7 +165,7 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: Notes ----- - Caller is responsible for checking dtype.kidn in ["m", "M"] + Caller is responsible for checking dtype.kind in ["m", "M"] """ if is_valid_nat_for_dtype(value, dtype): # GH#36541: can't fill array directly with pd.NaT