Skip to content

BUG: DataFrame(ndarray[dt64], dtype=object) #41599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,8 @@ Conversion
- Bug in :func:`factorize` where, when given an array with a numeric numpy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`)
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
- Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`)
- Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`)
- Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`)

Strings
^^^^^^^
Expand Down
20 changes: 7 additions & 13 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
Timedelta,
Timestamp,
conversion,
ints_to_pydatetime,
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._typing import (
Expand Down Expand Up @@ -1665,18 +1664,13 @@ def maybe_cast_to_datetime(
raise
pass

# coerce datetimelike to object
elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype):
if is_object_dtype(dtype):
value = cast(np.ndarray, value)

if value.dtype != DT64NS_DTYPE:
value = value.astype(DT64NS_DTYPE)
ints = np.asarray(value).view("i8")
return ints_to_pydatetime(ints)

# we have a non-castable dtype that was passed
raise TypeError(f"Cannot cast datetime64 to {dtype}")
elif getattr(vdtype, "kind", None) in ["m", "M"]:
# we are already datetimelike and want to coerce to non-datetimelike;
# astype_nansafe will raise for anything other than object, then upcast.
# see test_datetimelike_values_with_object_dtype
# error: Argument 2 to "astype_nansafe" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
return astype_nansafe(value, dtype) # type: ignore[arg-type]

elif isinstance(value, np.ndarray):
if value.dtype.kind in ["M", "m"]:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def _init_mgr(
if (
isinstance(mgr, BlockManager)
and len(mgr.blocks) == 1
and mgr.blocks[0].values.dtype == dtype
and is_dtype_equal(mgr.blocks[0].values.dtype, dtype)
):
pass
else:
Expand Down
35 changes: 35 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
IntervalDtype,
PandasDtype,
PeriodDtype,
)

Expand Down Expand Up @@ -111,6 +112,40 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series):
with pytest.raises(ValueError, match=msg):
frame_or_series(arr, dtype="m8[ns]")

@pytest.mark.parametrize("kind", ["m", "M"])
def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series):
# with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes
if kind == "M":
dtype = "M8[ns]"
scalar_type = Timestamp
else:
dtype = "m8[ns]"
scalar_type = Timedelta

arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2)
if frame_or_series is Series:
arr = arr[:, 0]

obj = frame_or_series(arr, dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)

# go through a different path in internals.construction
obj = frame_or_series(frame_or_series(arr), dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)

obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object))
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)

if frame_or_series is DataFrame:
# other paths through internals.construction
sers = [Series(x) for x in arr]
obj = frame_or_series(sers, dtype=object)
assert obj._mgr.arrays[0].dtype == object
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)

def test_series_with_name_not_matching_column(self):
# GH#9232
x = Series(range(5), name=1)
Expand Down