-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: Series constructor drops nanoseconds of Timedelta scalar #38040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2a334c9
d5f67b2
0e87f77
774a130
b5054c9
b1fbdb7
9c66d98
282041d
90202a0
41c75cf
940bc2c
6d36911
96e1876
8ae4dfe
85a4016
4e0a787
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1611,13 +1611,42 @@ def cast_scalar_to_array( | |
ndarray of shape, filled with value, of specified / inferred dtype | ||
|
||
""" | ||
# that's what the type annotation indicates | ||
assert isinstance(dtype, (type(None), str, np.dtype)) | ||
|
||
if dtype is None: | ||
dtype, fill_value = infer_dtype_from_scalar(value) | ||
dtype, value = infer_dtype_from_scalar(value) | ||
else: | ||
fill_value = value | ||
if not isinstance(dtype, np.dtype): | ||
dtype = np.dtype(dtype) | ||
empty = shape and not any(shape) | ||
# dtype coercion when empty: sometimes yes, sometimes no? | ||
|
||
if not empty and is_integer_dtype(dtype) and isna(value): | ||
# coerce if we have nan for an integer dtype | ||
dtype = np.dtype("float64") | ||
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): | ||
# we need to coerce to object dtype to avoid | ||
# to allow numpy to take our string as a scalar value | ||
dtype = np.dtype("object") | ||
if not isna(value): | ||
value = ensure_str(value) | ||
elif dtype.kind == "m": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could share the "m" and "M" code and for the is_valid_nat_for_dtype case use |
||
# GH38032: filling in Timedelta/Timestamp drops nanoseconds | ||
if isinstance(value, Timedelta): | ||
value = value.to_numpy() | ||
# GH36541: filling datetime-like array directly with pd.NaT | ||
# raises ValueError: cannot convert float NaN to integer | ||
elif is_valid_nat_for_dtype(value, dtype): | ||
value = np.timedelta64("NaT") | ||
elif dtype.kind == "M": | ||
if isinstance(value, Timestamp): | ||
value = value.to_numpy() | ||
elif is_valid_nat_for_dtype(value, dtype): | ||
value = np.datetime64("NaT") | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
values = np.empty(shape, dtype=dtype) | ||
values.fill(fill_value) | ||
values.fill(value) | ||
|
||
return values | ||
|
||
|
@@ -1643,26 +1672,8 @@ def construct_1d_arraylike_from_scalar( | |
if is_extension_array_dtype(dtype): | ||
cls = dtype.construct_array_type() | ||
subarr = cls._from_sequence([value] * length, dtype=dtype) | ||
|
||
else: | ||
|
||
if length and is_integer_dtype(dtype) and isna(value): | ||
# coerce if we have nan for an integer dtype | ||
dtype = np.dtype("float64") | ||
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): | ||
# we need to coerce to object dtype to avoid | ||
# to allow numpy to take our string as a scalar value | ||
dtype = np.dtype("object") | ||
if not isna(value): | ||
value = ensure_str(value) | ||
elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype): | ||
# GH36541: can't fill array directly with pd.NaT | ||
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) | ||
# ValueError: cannot convert float NaN to integer | ||
value = np.datetime64("NaT") | ||
|
||
subarr = np.empty(length, dtype=dtype) | ||
subarr.fill(value) | ||
subarr = cast_scalar_to_array((length,), value, dtype) | ||
|
||
return subarr | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
from pandas import ( | ||
Categorical, | ||
Interval, | ||
NaT, | ||
Period, | ||
Series, | ||
Timedelta, | ||
|
@@ -188,11 +189,32 @@ def test_infer_dtype_from_array(arr, expected, pandas_dtype): | |
(Period("2011-01-01", freq="D"), object), | ||
], | ||
) | ||
def test_cast_scalar_to_array(obj, dtype): | ||
shape = (3, 2) | ||
|
||
@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) | ||
def test_cast_scalar_to_array(obj, dtype, shape): | ||
exp = np.empty(shape, dtype=dtype) | ||
exp.fill(obj) | ||
|
||
arr = cast_scalar_to_array(shape, obj, dtype=dtype) | ||
arr = cast_scalar_to_array(shape, obj, dtype=np.dtype(dtype)) | ||
tm.assert_numpy_array_equal(arr, exp) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"obj_in,dtype_in,obj_out,dtype_out", | ||
[ | ||
(NaT, "timedelta64[ns]", np.timedelta64("NaT"), "timedelta64[ns]"), | ||
(Timedelta(1), "timedelta64[ns]", 1, "timedelta64[ns]"), | ||
(NaT, "datetime64[ns]", np.datetime64("NaT"), "datetime64[ns]"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls add a Nat that is timedelta64[ns] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
(Timestamp(1), "datetime64[ns]", 1, "datetime64[ns]"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a tz-aware Timestamp case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can probably use this fixture: ea_scalar_and_dtype There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be sure: as we cast to np.dtype, Period and Interval should be left aside, right? |
||
(Timestamp(1, tz="US/Eastern"), "datetime64[ns]", 1, "datetime64[ns]"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a deeply weird behavior that we should be trying to deprecate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, true, it can be counterintuitive that |
||
(np.nan, np.int64, np.nan, np.float64), | ||
("hello", "U", "hello", object), | ||
("hello", "S", "hello", object), | ||
], | ||
) | ||
@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) | ||
def test_cast_scalar_to_array_conversion_needed( | ||
obj_in, dtype_in, obj_out, dtype_out, shape | ||
): | ||
result = cast_scalar_to_array(shape, obj_in, dtype=np.dtype(dtype_in)) | ||
expected = np.full(shape, obj_out, dtype=dtype_out) | ||
tm.assert_numpy_array_equal(result, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1934,6 +1934,21 @@ def test_constructor_datetimes_with_nulls(self, arr): | |
expected = Series([np.dtype("datetime64[ns]")]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"scalar,dtype", | ||
[ | ||
(Timedelta(1), "timedelta64[ns]"), | ||
(Timestamp(1), "datetime64[ns]"), | ||
(Timestamp(1, tz="US/Eastern"), "datetime64[ns]"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same weird behavior |
||
], | ||
) | ||
def test_constructor_timelike_nanoseconds(self, scalar, dtype): | ||
# GH38032 | ||
df = DataFrame(scalar, index=[0], columns=[0], dtype=dtype) | ||
result = df.at[0, 0].value | ||
expected = scalar.value | ||
assert result == expected | ||
|
||
def test_constructor_for_list_with_dtypes(self): | ||
# test list of lists/ndarrays | ||
df = DataFrame([np.arange(5) for x in range(5)]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is there no risk of getting here with an EADtype?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As it is, no,
cast_scalar_to_array
is called after having checked dtype is not EADtype. It must have been written with that in mind.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add an assert isinstance(dtype, (type(None), str, np.dtype)) and a comment (that's what the type annotation indicates) (at the top of the function)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done