diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 12b0d90e68ab9..87ebff90c6021 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -470,7 +470,7 @@ Other API changes - Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) -- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`) +- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`) - :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`) - Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`) - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c1a30e03235b5..af6afedf99275 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -454,8 +454,6 @@ cpdef array_to_datetime( npy_datetimestruct dts NPY_DATETIMEUNIT out_bestunit bint utc_convert = bool(utc) - bint seen_integer = False - bint seen_datetime = False bint seen_datetime_offset = False bint is_raise = errors=="raise" bint is_ignore = errors=="ignore" @@ -486,7 +484,6 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT elif PyDateTime_Check(val): - seen_datetime = True if val.tzinfo is not None: found_tz = True else: @@ -501,12 +498,10 @@ cpdef array_to_datetime( result[i] = parse_pydatetime(val, &dts, utc_convert) elif PyDate_Check(val): - seen_datetime = True iresult[i] = pydate_to_dt64(val, &dts) check_dts_bounds(&dts) elif is_datetime64_object(val): - seen_datetime = True iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) elif is_integer_object(val) or is_float_object(val): @@ -521,7 +516,6 @@ cpdef array_to_datetime( ) return values, tz_out # these must be ns unit by-definition - seen_integer = True if val != val or val == NPY_NAT: iresult[i] = NPY_NAT @@ -654,17 +648,6 @@ cpdef array_to_datetime( except TypeError: return _array_to_datetime_object(values, errors, dayfirst, yearfirst) - if seen_datetime and seen_integer: - # we have mixed datetimes & integers - - if is_coerce: - # coerce all of the integers/floats to NaT, preserve - # the datetimes and other convertibles - for i in range(n): - val = values[i] - if is_integer_object(val) or is_float_object(val): - result[i] = NPY_NAT - if seen_datetime_offset and not utc_convert: # GH#17697 # 1) If all the offsets are equal, return one offset for diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..417b7e85ce6f9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1588,29 +1588,24 @@ def test_unit_with_numeric_coerce(self, cache, exp, arr, warning): tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "exp, arr", + "arr", [ - [ - ["2013-01-01", "NaT", "NaT"], - [Timestamp("20130101"), 1.434692e18, 1.432766e18], - ], - [ - ["NaT", "NaT", "2013-01-01"], - [1.434692e18, 1.432766e18, Timestamp("20130101")], - ], + [Timestamp("20130101"), 1.434692e18, 1.432766e18], + [1.434692e18, 1.432766e18, Timestamp("20130101")], ], ) - def test_unit_mixed(self, cache, exp, arr): - + def test_unit_mixed(self, cache, arr): + # GH#50453 pre-2.0 with mixed numeric/datetimes and errors="coerce" + # the numeric entries would be coerced to NaT, was never clear exactly + # why. # mixed integers/datetimes - expected = DatetimeIndex(exp) + expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]") result = to_datetime(arr, errors="coerce", cache=cache) tm.assert_index_equal(result, expected) # GH#49037 pre-2.0 this raised, but it always worked with Series, # was never clear why it was disallowed result = to_datetime(arr, errors="raise", cache=cache) - expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]") tm.assert_index_equal(result, expected) result = DatetimeIndex(arr)