Skip to content

Commit dc674dd

Browse files
authored
BUG: Series/DataFrame construction from scalars (#38405)
1 parent 3999efa commit dc674dd

File tree

5 files changed

+98
-13
lines changed

5 files changed

+98
-13
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ Categorical
8686

8787
Datetimelike
8888
^^^^^^^^^^^^
89-
89+
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
9090
-
9191
-
9292

pandas/core/dtypes/cast.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import numpy as np
2222

23-
from pandas._libs import lib, tslib, tslibs
23+
from pandas._libs import lib, tslib
2424
from pandas._libs.tslibs import (
2525
NaT,
2626
OutOfBoundsDatetime,
@@ -145,13 +145,35 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal
145145
if dtype == object:
146146
pass
147147
elif isinstance(value, (np.datetime64, datetime)):
148-
value = tslibs.Timestamp(value)
148+
value = Timestamp(value)
149149
elif isinstance(value, (np.timedelta64, timedelta)):
150-
value = tslibs.Timedelta(value)
150+
value = Timedelta(value)
151151

152152
return value
153153

154154

155+
def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
156+
"""
157+
Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting
158+
into a numpy array. Failing to unbox would risk dropping nanoseconds.
159+
160+
Notes
161+
-----
162+
Caller is responsible for checking dtype.kind in ["m", "M"]
163+
"""
164+
if is_valid_nat_for_dtype(value, dtype):
165+
# GH#36541: can't fill array directly with pd.NaT
166+
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
167+
# ValueError: cannot convert float NaN to integer
168+
value = dtype.type("NaT", "ns")
169+
elif isinstance(value, Timestamp):
170+
if value.tz is None:
171+
value = value.to_datetime64()
172+
elif isinstance(value, Timedelta):
173+
value = value.to_timedelta64()
174+
return value
175+
176+
155177
def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
156178
"""
157179
try to cast to the specified dtype (e.g. convert back to bool/int
@@ -1361,8 +1383,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
13611383
raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
13621384

13631385
if is_scalar(value):
1364-
if value == iNaT or isna(value):
1365-
value = iNaT
1386+
value = maybe_unbox_datetimelike(value, dtype)
13661387
elif not is_sparse(value):
13671388
value = np.array(value, copy=False)
13681389

@@ -1535,11 +1556,8 @@ def construct_1d_arraylike_from_scalar(
15351556
dtype = np.dtype("object")
15361557
if not isna(value):
15371558
value = ensure_str(value)
1538-
elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype):
1539-
# GH36541: can't fill array directly with pd.NaT
1540-
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
1541-
# ValueError: cannot convert float NaN to integer
1542-
value = dtype.type("NaT", "ns")
1559+
elif dtype.kind in ["M", "m"]:
1560+
value = maybe_unbox_datetimelike(value, dtype)
15431561

15441562
subarr = np.empty(length, dtype=dtype)
15451563
subarr.fill(value)

pandas/core/frame.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
maybe_convert_platform,
8787
maybe_downcast_to_dtype,
8888
maybe_infer_to_datetimelike,
89+
maybe_unbox_datetimelike,
8990
validate_numeric_casting,
9091
)
9192
from pandas.core.dtypes.common import (
@@ -592,6 +593,9 @@ def __init__(
592593
]
593594
mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
594595
else:
596+
if dtype.kind in ["m", "M"]:
597+
data = maybe_unbox_datetimelike(data, dtype)
598+
595599
# Attempt to coerce to a numpy array
596600
try:
597601
arr = np.array(data, dtype=dtype, copy=copy)

pandas/tests/dtypes/cast/test_construct_from_scalar.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import numpy as np
2+
13
from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
24
from pandas.core.dtypes.dtypes import CategoricalDtype
35

4-
from pandas import Categorical
6+
from pandas import Categorical, Timedelta, Timestamp
57
import pandas._testing as tm
68

79

@@ -16,3 +18,17 @@ def test_cast_1d_array_like_from_scalar_categorical():
1618

1719
result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
1820
tm.assert_categorical_equal(result, expected)
21+
22+
23+
def test_cast_1d_array_like_from_timestamp():
24+
# check we dont lose nanoseconds
25+
ts = Timestamp.now() + Timedelta(1)
26+
res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]"))
27+
assert res[0] == ts
28+
29+
30+
def test_cast_1d_array_like_from_timedelta():
31+
# check we dont lose nanoseconds
32+
td = Timedelta(1)
33+
res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]"))
34+
assert res[0] == td

pandas/tests/frame/test_constructors.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pytz
1212

1313
from pandas.compat import is_platform_little_endian
14-
from pandas.compat.numpy import _np_version_under1p19
14+
from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20
1515

1616
from pandas.core.dtypes.common import is_integer_dtype
1717
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
@@ -2915,3 +2915,50 @@ def test_construction_from_set_raises(self):
29152915
msg = "Set type is unordered"
29162916
with pytest.raises(TypeError, match=msg):
29172917
DataFrame({"a": {1, 2, 3}})
2918+
2919+
2920+
def get1(obj):
2921+
if isinstance(obj, Series):
2922+
return obj.iloc[0]
2923+
else:
2924+
return obj.iloc[0, 0]
2925+
2926+
2927+
class TestFromScalar:
2928+
@pytest.fixture
2929+
def constructor(self, frame_or_series):
2930+
if frame_or_series is Series:
2931+
return functools.partial(Series, index=range(2))
2932+
else:
2933+
return functools.partial(DataFrame, index=range(2), columns=range(2))
2934+
2935+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
2936+
def test_from_nat_scalar(self, dtype, constructor):
2937+
obj = constructor(pd.NaT, dtype=dtype)
2938+
assert np.all(obj.dtypes == dtype)
2939+
assert np.all(obj.isna())
2940+
2941+
def test_from_timedelta_scalar_preserves_nanos(self, constructor):
2942+
td = Timedelta(1)
2943+
2944+
obj = constructor(td, dtype="m8[ns]")
2945+
assert get1(obj) == td
2946+
2947+
def test_from_timestamp_scalar_preserves_nanos(self, constructor):
2948+
ts = Timestamp.now() + Timedelta(1)
2949+
2950+
obj = Series(ts, index=range(1), dtype="M8[ns]")
2951+
assert get1(obj) == ts
2952+
2953+
def test_from_timedelta64_scalar_object(self, constructor, request):
2954+
if constructor.func is DataFrame and _np_version_under1p20:
2955+
mark = pytest.mark.xfail(
2956+
reason="np.array(td64, dtype=object) converts to int"
2957+
)
2958+
request.node.add_marker(mark)
2959+
2960+
td = Timedelta(1)
2961+
td64 = td.to_timedelta64()
2962+
2963+
obj = constructor(td64, dtype=object)
2964+
assert isinstance(get1(obj), np.timedelta64)

0 commit comments

Comments
 (0)