From 2a334c973013ffaa3366d2342dc273f308670ee2 Mon Sep 17 00:00:00 2001 From: lpc Date: Tue, 24 Nov 2020 15:53:26 +0100 Subject: [PATCH 01/13] BUG: series from dict of Timedelta scalar drops nanoseconds (#GH38032) use convert_scalar_for_putitemlike for better conversion --- pandas/core/dtypes/cast.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 465ec821400e7..60101beb2ba30 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -88,12 +88,7 @@ ABCSeries, ) from pandas.core.dtypes.inference import is_list_like -from pandas.core.dtypes.missing import ( - is_valid_nat_for_dtype, - isna, - na_value_for_dtype, - notna, -) +from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna if TYPE_CHECKING: from pandas import Series @@ -1697,11 +1692,10 @@ def construct_1d_arraylike_from_scalar( dtype = np.dtype("object") if not isna(value): value = ensure_str(value) - elif dtype.kind in ["M", "m"] and is_valid_nat_for_dtype(value, dtype): - # GH36541: can't fill array directly with pd.NaT - # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT) - # ValueError: cannot convert float NaN to integer - value = np.datetime64("NaT") + elif dtype.kind in ["M", "m"]: + # GH36541: can't fill array directly with pd.NaT -> ValueError + # GH38032: filling in pd.Timedelta loses nanoseconds + value = convert_scalar_for_putitemlike(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) From d5f67b2acd90986c922d5246739a82138cdd7a0d Mon Sep 17 00:00:00 2001 From: lpc Date: Wed, 25 Nov 2020 14:26:14 +0100 Subject: [PATCH 02/13] BUG: series from dict of Timedelta scalar drops nanoseconds add test --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 12682a68fe177..4091d07683c76 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -703,6 +703,7 @@ def float_frame(): DatetimeTZDtype(tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), + (Timedelta(nanoseconds=1), "timedelta64[ns]"), # GH38032 ] ) def ea_scalar_and_dtype(request): From 0e87f77ac45305f10842a7ecf2ebf834adef91ad Mon Sep 17 00:00:00 2001 From: lpc Date: Wed, 25 Nov 2020 18:45:21 +0100 Subject: [PATCH 03/13] BUG: series from dict of Timedelta scalar drops nanoseconds reworking --- pandas/core/dtypes/cast.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 60101beb2ba30..ee007420fcd44 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -88,7 +88,12 @@ ABCSeries, ) from pandas.core.dtypes.inference import is_list_like -from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core.dtypes.missing import ( + is_valid_nat_for_dtype, + isna, + na_value_for_dtype, + notna, +) if TYPE_CHECKING: from pandas import Series @@ -1693,9 +1698,13 @@ def construct_1d_arraylike_from_scalar( if not isna(value): value = ensure_str(value) elif dtype.kind in ["M", "m"]: - # GH36541: can't fill array directly with pd.NaT -> ValueError - # GH38032: filling in pd.Timedelta loses nanoseconds - value = convert_scalar_for_putitemlike(value, dtype) + # GH38032: filling in Timedelta/Timestamp drops nanoseconds + if isinstance(value, (Timedelta, Timestamp)): + value = value.to_numpy() + # GH36541: filling datetime-like array directly with pd.NaT + # raises ValueError: cannot convert float NaN to integer + elif is_valid_nat_for_dtype(value, dtype): + value = np.datetime64("NaT") subarr = np.empty(length, dtype=dtype) subarr.fill(value) From 774a13044136802681206f0da90255ad551cc314 Mon Sep 17 00:00:00 2001 From: lpc Date: Wed, 25 Nov 2020 20:04:23 +0100 Subject: [PATCH 04/13] BUG: Series/DF constructors may drop Timedelta/Timestamp nanoseconds use cast_scalar_to_array as base for ndarray creation --- pandas/core/dtypes/cast.py | 44 +++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ee007420fcd44..1f40d5512a52b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1654,12 +1654,28 @@ def cast_scalar_to_array( """ if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) else: - fill_value = value + if shape and is_integer_dtype(dtype) and isna(value): + # coerce if we have nan for an integer dtype + dtype = np.dtype("float64") + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = np.dtype("object") + if not isna(value): + value = ensure_str(value) + elif dtype.kind in ["M", "m"]: + # GH38032: filling in Timedelta/Timestamp drops nanoseconds + if isinstance(value, (Timedelta, Timestamp)): + value = value.to_numpy() + # GH36541: filling datetime-like array directly with pd.NaT + # raises ValueError: cannot convert float NaN to integer + elif is_valid_nat_for_dtype(value, dtype): + value = np.datetime64("NaT") values = np.empty(shape, dtype=dtype) - values.fill(fill_value) + values.fill(value) return values @@ -1687,27 +1703,7 @@ def construct_1d_arraylike_from_scalar( subarr = cls._from_sequence([value] * length, dtype=dtype) else: - - if length and is_integer_dtype(dtype) and isna(value): - # coerce if we have nan for an integer dtype - dtype = np.dtype("float64") - elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): - # we need to coerce to object dtype to avoid - # to allow numpy to take our string as a scalar value - dtype = np.dtype("object") - if not isna(value): - value = ensure_str(value) - elif dtype.kind in ["M", "m"]: - # GH38032: filling in Timedelta/Timestamp drops nanoseconds - if isinstance(value, (Timedelta, Timestamp)): - value = value.to_numpy() - # GH36541: filling datetime-like array directly with pd.NaT - # raises ValueError: cannot convert float NaN to integer - elif is_valid_nat_for_dtype(value, dtype): - value = np.datetime64("NaT") - - subarr = np.empty(length, dtype=dtype) - subarr.fill(value) + subarr = cast_scalar_to_array(length, value, dtype) return subarr From b5054c9dc535b83077f2b59557fca638249bc2a7 Mon Sep 17 00:00:00 2001 From: lpc Date: Wed, 25 Nov 2020 21:44:22 +0100 Subject: [PATCH 05/13] BUG: Series/DF constructors may drop Timedelta/Timestamp nanoseconds Take care of non np.dtype args --- pandas/core/dtypes/cast.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1f40d5512a52b..ee3c0860c71ea 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1656,7 +1656,11 @@ def cast_scalar_to_array( if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: - if shape and is_integer_dtype(dtype) and isna(value): + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + not_empty = shape if is_integer(shape) else (not shape or any(shape)) + + if not_empty and is_integer_dtype(dtype) and isna(value): # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): @@ -1701,7 +1705,6 @@ def construct_1d_arraylike_from_scalar( if is_extension_array_dtype(dtype): cls = dtype.construct_array_type() subarr = cls._from_sequence([value] * length, dtype=dtype) - else: subarr = cast_scalar_to_array(length, value, dtype) From b1fbdb7b703cfc99470421010309b0abba23bbef Mon Sep 17 00:00:00 2001 From: lpc Date: Wed, 25 Nov 2020 22:56:54 +0100 Subject: [PATCH 06/13] BUG: Series/DF constructors may drop Timedelta/Timestamp nanoseconds cast_scalar_to_array: more testing + cast shape to tuple where needed --- pandas/core/dtypes/cast.py | 7 ++--- pandas/core/frame.py | 7 +---- pandas/tests/dtypes/cast/test_infer_dtype.py | 27 +++++++++++++++++--- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ee3c0860c71ea..d087474b089c1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1658,9 +1658,10 @@ def cast_scalar_to_array( else: if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) - not_empty = shape if is_integer(shape) else (not shape or any(shape)) + empty = shape and not any(shape) + # rem: type coercion if empty: sometimes yes, sometimes no ? - if not_empty and is_integer_dtype(dtype) and isna(value): + if not empty and is_integer_dtype(dtype) and isna(value): # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): @@ -1706,7 +1707,7 @@ def construct_1d_arraylike_from_scalar( cls = dtype.construct_array_type() subarr = cls._from_sequence([value] * length, dtype=dtype) else: - subarr = cast_scalar_to_array(length, value, dtype) + subarr = cast_scalar_to_array((length,), value, dtype) return subarr diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5b87c4ea8b9cc..03e1fb674cc87 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3864,12 +3864,7 @@ def reindexer(value): value, len(self.index), infer_dtype ) else: - # pandas\core\frame.py:3827: error: Argument 1 to - # "cast_scalar_to_array" has incompatible type "int"; expected - # "Tuple[Any, ...]" [arg-type] - value = cast_scalar_to_array( - len(self.index), value # type: ignore[arg-type] - ) + value = cast_scalar_to_array((len(self.index),), value) value = maybe_cast_to_datetime(value, infer_dtype) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 157adacbdfdf7..0c317f05a88a6 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -13,6 +13,7 @@ from pandas import ( Categorical, Interval, + NaT, Period, Series, Timedelta, @@ -188,11 +189,31 @@ def test_infer_dtype_from_array(arr, expected, pandas_dtype): (Period("2011-01-01", freq="D"), object), ], ) -def test_cast_scalar_to_array(obj, dtype): - shape = (3, 2) - +@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) +def test_cast_scalar_to_array(obj, dtype, shape): exp = np.empty(shape, dtype=dtype) exp.fill(obj) arr = cast_scalar_to_array(shape, obj, dtype=dtype) tm.assert_numpy_array_equal(arr, exp) + + +@pytest.mark.parametrize( + "obj_in,dtype_in,obj_out,dtype_out", + [ + (NaT, "datetime64[ns]", np.datetime64("NaT"), "datetime64[ns]"), + (Timestamp(1), "datetime64[ns]", 1, "datetime64[ns]"), + (Timedelta(1), "timedelta64[ns]", 1, "timedelta64[ns]"), + (np.nan, np.int64, np.nan, np.float64), + ("hello", "U", "hello", object), + ("hello", "S", "hello", object), + ], +) +@pytest.mark.parametrize("shape", [(), (5,), (3, 2)]) +def test_cast_scalar_to_array_conversion_needed( + obj_in, dtype_in, obj_out, dtype_out, shape +): + tm.assert_numpy_array_equal( + cast_scalar_to_array(shape, obj_in, dtype_in), + np.full(shape, fill_value=obj_out, dtype=dtype_out), + ) From 282041d07159734b8188757bf7805a78b3245c65 Mon Sep 17 00:00:00 2001 From: lpc Date: Fri, 27 Nov 2020 00:35:56 +0100 Subject: [PATCH 07/13] BUG: Series/DF constructors may drop Timedelta/Timestamp nanoseconds CR --- pandas/tests/dtypes/cast/test_infer_dtype.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 0c317f05a88a6..473b1f702bd97 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -201,9 +201,11 @@ def test_cast_scalar_to_array(obj, dtype, shape): @pytest.mark.parametrize( "obj_in,dtype_in,obj_out,dtype_out", [ + (NaT, "timedelta64[ns]", np.timedelta64("NaT"), "timedelta64[ns]"), + (Timedelta(1), "timedelta64[ns]", 1, "timedelta64[ns]"), (NaT, "datetime64[ns]", np.datetime64("NaT"), "datetime64[ns]"), (Timestamp(1), "datetime64[ns]", 1, "datetime64[ns]"), - (Timedelta(1), "timedelta64[ns]", 1, "timedelta64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]", 1, "datetime64[ns]"), (np.nan, np.int64, np.nan, np.float64), ("hello", "U", "hello", object), ("hello", "S", "hello", object), @@ -213,7 +215,6 @@ def test_cast_scalar_to_array(obj, dtype, shape): def test_cast_scalar_to_array_conversion_needed( obj_in, dtype_in, obj_out, dtype_out, shape ): - tm.assert_numpy_array_equal( - cast_scalar_to_array(shape, obj_in, dtype_in), - np.full(shape, fill_value=obj_out, dtype=dtype_out), - ) + result = cast_scalar_to_array(shape, obj_in, dtype=dtype_in) + expected = np.full(shape, obj_out, dtype=dtype_out) + tm.assert_numpy_array_equal(result, expected) From 90202a05e1b69346e4e341c646ddf8352e832c0d Mon Sep 17 00:00:00 2001 From: lpc Date: Fri, 27 Nov 2020 13:07:48 +0100 Subject: [PATCH 08/13] CR -- WIP --- pandas/core/dtypes/cast.py | 4 +++- pandas/tests/series/test_constructors.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d087474b089c1..940763479f89c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1677,7 +1677,9 @@ def cast_scalar_to_array( # GH36541: filling datetime-like array directly with pd.NaT # raises ValueError: cannot convert float NaN to integer elif is_valid_nat_for_dtype(value, dtype): - value = np.datetime64("NaT") + value = ( + np.datetime64("NaT") if dtype.kind == "M" else np.timedelta64("NaT") + ) values = np.empty(shape, dtype=dtype) values.fill(value) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d790a85c94193..1dc5c6bd9b0bf 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -24,6 +24,7 @@ Period, RangeIndex, Series, + Timedelta, Timestamp, date_range, isna, @@ -1319,6 +1320,21 @@ def test_constructor_dtype_timedelta64(self): s = Series([pd.NaT, np.nan, "1 Day"]) assert s.dtype == "timedelta64[ns]" + @pytest.mark.parametrize( + "value,dtype", + [ + (Timedelta(1), "timedelta64[ns]"), + (Timedelta(1, tz="Pacific/Eastern"), "timedelta64[ns]"), + (Timestamp(1), "datetime64[ns]"), + ], + ) + def test_constructor_timelike_nanoseconds(self, value, dtype): + # GH38032 + ser = Series(value, index=[0], dtype=dtype) + result = ser[0] + expected = value + assert result == expected + # GH 16406 def test_constructor_mixed_tz(self): s = Series([Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")]) From 41c75cf8c0b6cfd3831113773a02e58798a32788 Mon Sep 17 00:00:00 2001 From: lpc Date: Sun, 29 Nov 2020 22:34:59 +0100 Subject: [PATCH 09/13] cr: add test for DF constructor --- pandas/tests/frame/test_constructors.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d32ca454b5fb2..cabf731bbc3f0 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1936,6 +1936,21 @@ def test_constructor_datetimes_with_nulls(self, arr): expected = Series([np.dtype("datetime64[ns]")]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "value,dtype", + [ + (Timedelta(1), "timedelta64[ns]"), + (Timedelta(1, tz="Pacific/Eastern"), "timedelta64[ns]"), + (Timestamp(1), "datetime64[ns]"), + ], + ) + def test_constructor_timelike_nanoseconds(self, value, dtype): + # GH38032 + df = DataFrame(value, index=[0], columns=[0], dtype=dtype) + result = df.at[0, 0] + expected = value + assert result == expected + def test_constructor_for_list_with_dtypes(self): # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) From 940bc2c2af87719c56cbed5d56ad35171adde486 Mon Sep 17 00:00:00 2001 From: lpc Date: Sun, 29 Nov 2020 22:32:25 +0100 Subject: [PATCH 10/13] cr: assert dtype in _cast_scalar_to_array --- pandas/core/dtypes/cast.py | 5 ++++- pandas/tests/dtypes/cast/test_infer_dtype.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 940763479f89c..fcc6d5d0ed537 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1653,13 +1653,16 @@ def cast_scalar_to_array( ndarray of shape, filled with value, of specified / inferred dtype """ + # that's what the type annotation indicates + assert isinstance(dtype, (type(None), str, np.dtype)) + if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) empty = shape and not any(shape) - # rem: type coercion if empty: sometimes yes, sometimes no ? + # dtype coercion when empty: sometimes yes, sometimes no? if not empty and is_integer_dtype(dtype) and isna(value): # coerce if we have nan for an integer dtype diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 473b1f702bd97..178f9a0343e00 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -194,7 +194,7 @@ def test_cast_scalar_to_array(obj, dtype, shape): exp = np.empty(shape, dtype=dtype) exp.fill(obj) - arr = cast_scalar_to_array(shape, obj, dtype=dtype) + arr = cast_scalar_to_array(shape, obj, dtype=np.dtype(dtype)) tm.assert_numpy_array_equal(arr, exp) @@ -215,6 +215,6 @@ def test_cast_scalar_to_array(obj, dtype, shape): def test_cast_scalar_to_array_conversion_needed( obj_in, dtype_in, obj_out, dtype_out, shape ): - result = cast_scalar_to_array(shape, obj_in, dtype=dtype_in) + result = cast_scalar_to_array(shape, obj_in, dtype=np.dtype(dtype_in)) expected = np.full(shape, obj_out, dtype=dtype_out) tm.assert_numpy_array_equal(result, expected) From 96e1876e11131e36fa4dc2d601db74df915bab9a Mon Sep 17 00:00:00 2001 From: lpc Date: Sun, 29 Nov 2020 23:38:18 +0100 Subject: [PATCH 11/13] whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bb06bcc9b5aa8..3d35fb8a3b559 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -557,6 +557,7 @@ Datetimelike - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`) - Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`) - Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) +- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) Timedelta ^^^^^^^^^ From 8ae4dfead17b10ae118c23298b6e264717d7a730 Mon Sep 17 00:00:00 2001 From: lpc Date: Mon, 30 Nov 2020 00:38:55 +0100 Subject: [PATCH 12/13] correcting constructor tests --- pandas/tests/frame/test_constructors.py | 12 ++++++------ pandas/tests/series/test_constructors.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1fb1736cb457e..f850cc140beb1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1935,18 +1935,18 @@ def test_constructor_datetimes_with_nulls(self, arr): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "value,dtype", + "scalar,dtype", [ (Timedelta(1), "timedelta64[ns]"), - (Timedelta(1, tz="Pacific/Eastern"), "timedelta64[ns]"), (Timestamp(1), "datetime64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]"), ], ) - def test_constructor_timelike_nanoseconds(self, value, dtype): + def test_constructor_timelike_nanoseconds(self, scalar, dtype): # GH38032 - df = DataFrame(value, index=[0], columns=[0], dtype=dtype) - result = df.at[0, 0] - expected = value + df = DataFrame(scalar, index=[0], columns=[0], dtype=dtype) + result = df.at[0, 0].value + expected = scalar.value assert result == expected def test_constructor_for_list_with_dtypes(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 54d9bb93ab6c4..891650d381318 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1321,18 +1321,18 @@ def test_constructor_dtype_timedelta64(self): assert s.dtype == "timedelta64[ns]" @pytest.mark.parametrize( - "value,dtype", + "scalar,dtype", [ (Timedelta(1), "timedelta64[ns]"), - (Timedelta(1, tz="Pacific/Eastern"), "timedelta64[ns]"), (Timestamp(1), "datetime64[ns]"), + (Timestamp(1, tz="US/Eastern"), "timedelta64[ns]"), ], ) - def test_constructor_timelike_nanoseconds(self, value, dtype): + def test_constructor_timelike_nanoseconds(self, scalar, dtype): # GH38032 - ser = Series(value, index=[0], dtype=dtype) - result = ser[0] - expected = value + ser = Series(scalar, index=[0], dtype=dtype) + result = ser[0].value + expected = scalar.value assert result == expected # GH 16406 From 85a4016215c7ad4a73569feb3fc682e774dc81d2 Mon Sep 17 00:00:00 2001 From: lpc Date: Mon, 30 Nov 2020 14:54:08 +0100 Subject: [PATCH 13/13] prevent Timestamp <-> Timedelta coercion + missed fix on tests --- pandas/core/dtypes/cast.py | 13 ++++++++----- pandas/tests/series/test_constructors.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 81e4597a515e8..326db73264d19 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1631,16 +1631,19 @@ def cast_scalar_to_array( dtype = np.dtype("object") if not isna(value): value = ensure_str(value) - elif dtype.kind in ["M", "m"]: + elif dtype.kind == "m": # GH38032: filling in Timedelta/Timestamp drops nanoseconds - if isinstance(value, (Timedelta, Timestamp)): + if isinstance(value, Timedelta): value = value.to_numpy() # GH36541: filling datetime-like array directly with pd.NaT # raises ValueError: cannot convert float NaN to integer elif is_valid_nat_for_dtype(value, dtype): - value = ( - np.datetime64("NaT") if dtype.kind == "M" else np.timedelta64("NaT") - ) + value = np.timedelta64("NaT") + elif dtype.kind == "M": + if isinstance(value, Timestamp): + value = value.to_numpy() + elif is_valid_nat_for_dtype(value, dtype): + value = np.datetime64("NaT") values = np.empty(shape, dtype=dtype) values.fill(value) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 891650d381318..3d17a797d486e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1325,7 +1325,7 @@ def test_constructor_dtype_timedelta64(self): [ (Timedelta(1), "timedelta64[ns]"), (Timestamp(1), "datetime64[ns]"), - (Timestamp(1, tz="US/Eastern"), "timedelta64[ns]"), + (Timestamp(1, tz="US/Eastern"), "datetime64[ns]"), ], ) def test_constructor_timelike_nanoseconds(self, scalar, dtype):