Skip to content

BUG: Series/DataFrame construction from scalars #38405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Categorical

Datetimelike
^^^^^^^^^^^^

- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
-
-

Expand Down
38 changes: 28 additions & 10 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import numpy as np

from pandas._libs import lib, tslib, tslibs
from pandas._libs import lib, tslib
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -151,13 +151,35 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal
if dtype == object:
pass
elif isinstance(value, (np.datetime64, datetime)):
value = tslibs.Timestamp(value)
value = Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslibs.Timedelta(value)
value = Timedelta(value)

return value


def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
"""
Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting
into a numpy array. Failing to unbox would risk dropping nanoseconds.

Notes
-----
Caller is responsible for checking dtype.kind in ["m", "M"]
"""
if is_valid_nat_for_dtype(value, dtype):
# GH#36541: can't fill array directly with pd.NaT
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
# ValueError: cannot convert float NaN to integer
value = dtype.type("NaT", "ns")
elif isinstance(value, Timestamp):
if value.tz is None:
value = value.to_datetime64()
elif isinstance(value, Timedelta):
value = value.to_timedelta64()
return value


def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
"""
try to cast to the specified dtype (e.g. convert back to bool/int
Expand Down Expand Up @@ -1428,8 +1450,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")

if is_scalar(value):
if value == iNaT or isna(value):
value = iNaT
value = maybe_unbox_datetimelike(value, dtype)
elif not is_sparse(value):
value = np.array(value, copy=False)

Expand Down Expand Up @@ -1602,11 +1623,8 @@ def construct_1d_arraylike_from_scalar(
dtype = np.dtype("object")
if not isna(value):
value = ensure_str(value)
elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype):
# GH36541: can't fill array directly with pd.NaT
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
# ValueError: cannot convert float NaN to integer
value = dtype.type("NaT", "ns")
elif dtype.kind in ["M", "m"]:
value = maybe_unbox_datetimelike(value, dtype)

subarr = np.empty(length, dtype=dtype)
subarr.fill(value)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
maybe_convert_platform,
maybe_downcast_to_dtype,
maybe_infer_to_datetimelike,
maybe_unbox_datetimelike,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -593,6 +594,9 @@ def __init__(
]
mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
else:
if dtype.kind in ["m", "M"]:
data = maybe_unbox_datetimelike(data, dtype)

# Attempt to coerce to a numpy array
try:
arr = np.array(data, dtype=dtype, copy=copy)
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/dtypes/cast/test_construct_from_scalar.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import numpy as np

from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
from pandas.core.dtypes.dtypes import CategoricalDtype

from pandas import Categorical
from pandas import Categorical, Timedelta, Timestamp
import pandas._testing as tm


Expand All @@ -16,3 +18,17 @@ def test_cast_1d_array_like_from_scalar_categorical():

result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
tm.assert_categorical_equal(result, expected)


def test_cast_1d_array_like_from_timestamp():
# check we dont lose nanoseconds
ts = Timestamp.now() + Timedelta(1)
res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]"))
assert res[0] == ts


def test_cast_1d_array_like_from_timedelta():
# check we dont lose nanoseconds
td = Timedelta(1)
res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]"))
assert res[0] == td
49 changes: 48 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pytz

from pandas.compat import is_platform_little_endian
from pandas.compat.numpy import _np_version_under1p19
from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20

from pandas.core.dtypes.common import is_integer_dtype
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
Expand Down Expand Up @@ -2915,3 +2915,50 @@ def test_construction_from_set_raises(self):
msg = "Set type is unordered"
with pytest.raises(TypeError, match=msg):
DataFrame({"a": {1, 2, 3}})


def get1(obj):
if isinstance(obj, Series):
return obj.iloc[0]
else:
return obj.iloc[0, 0]


class TestFromScalar:
@pytest.fixture
def constructor(self, frame_or_series):
if frame_or_series is Series:
return functools.partial(Series, index=range(2))
else:
return functools.partial(DataFrame, index=range(2), columns=range(2))

@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_from_nat_scalar(self, dtype, constructor):
obj = constructor(pd.NaT, dtype=dtype)
assert np.all(obj.dtypes == dtype)
assert np.all(obj.isna())

def test_from_timedelta_scalar_preserves_nanos(self, constructor):
td = Timedelta(1)

obj = constructor(td, dtype="m8[ns]")
assert get1(obj) == td

def test_from_timestamp_scalar_preserves_nanos(self, constructor):
ts = Timestamp.now() + Timedelta(1)

obj = Series(ts, index=range(1), dtype="M8[ns]")
assert get1(obj) == ts

def test_from_timedelta64_scalar_object(self, constructor, request):
if constructor.func is DataFrame and _np_version_under1p20:
mark = pytest.mark.xfail(
reason="np.array(td64, dtype=object) converts to int"
)
request.node.add_marker(mark)

td = Timedelta(1)
td64 = td.to_timedelta64()

obj = constructor(td64, dtype=object)
assert isinstance(get1(obj), np.timedelta64)