From b83385ad49499c102cb90b19505d9553875345e8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 May 2021 21:59:29 -0700 Subject: [PATCH 1/2] BUG: DataFrame(ndarray[dt64], dtype=object) --- pandas/core/dtypes/cast.py | 20 +++++--------- pandas/core/generic.py | 2 +- pandas/tests/frame/test_constructors.py | 35 +++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94cffe8fb840d..01ba734951801 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -31,7 +31,6 @@ Timedelta, Timestamp, conversion, - ints_to_pydatetime, ) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( @@ -1653,18 +1652,13 @@ def maybe_cast_to_datetime( raise pass - # coerce datetimelike to object - elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype): - if is_object_dtype(dtype): - value = cast(np.ndarray, value) - - if value.dtype != DT64NS_DTYPE: - value = value.astype(DT64NS_DTYPE) - ints = np.asarray(value).view("i8") - return ints_to_pydatetime(ints) - - # we have a non-castable dtype that was passed - raise TypeError(f"Cannot cast datetime64 to {dtype}") + elif getattr(vdtype, "kind", None) in ["m", "M"]: + # we are already datetimelike and want to coerce to non-datetimelike; + # astype_nansafe will raise for anything other than object, then upcast. + # see test_datetimelike_values_with_object_dtype + # error: Argument 2 to "astype_nansafe" has incompatible type + # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" + return astype_nansafe(value, dtype) # type: ignore[arg-type] elif isinstance(value, np.ndarray): if value.dtype.kind in ["M", "m"]: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7162bda0eff43..6d7c803685255 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -267,7 +267,7 @@ def _init_mgr( if ( isinstance(mgr, BlockManager) and len(mgr.blocks) == 1 - and mgr.blocks[0].values.dtype == dtype + and is_dtype_equal(mgr.blocks[0].values.dtype, dtype) ): pass else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6e9991ff17ac3..30ccefec65660 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -24,6 +24,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, IntervalDtype, + PandasDtype, PeriodDtype, ) @@ -99,6 +100,40 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): with pytest.raises(ValueError, match=msg): frame_or_series(arr, dtype="m8[ns]") + @pytest.mark.parametrize("kind", ["m", "M"]) + def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series): + # with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes + if kind == "M": + dtype = "M8[ns]" + scalar_type = Timestamp + else: + dtype = "m8[ns]" + scalar_type = Timedelta + + arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2) + if frame_or_series is Series: + arr = arr[:, 0] + + obj = frame_or_series(arr, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + # go through a different path in internals.construction + obj = frame_or_series(frame_or_series(arr), dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object)) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + + if frame_or_series is DataFrame: + # other paths through internals.construction + sers = [Series(x) for x in arr] + obj = frame_or_series(sers, dtype=object) + assert obj._mgr.arrays[0].dtype == object + assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type) + def test_series_with_name_not_matching_column(self): # GH#9232 x = Series(range(5), name=1) From 35e1472ffc89d122bc0e302c9b5e2fc75d1c8d0c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 08:59:57 -0700 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c6c4dadaf8c9d..b3d9252acd3cb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -775,6 +775,8 @@ Conversion - Bug in :func:`factorize` where, when given an array with a numeric numpy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) - Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`) +- Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`) +- Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`) Strings ^^^^^^^