Skip to content

API: to_datetime allow mixed numeric/datetime with errors=coerce #50453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ Other API changes
- Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`)
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`)
- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
Expand Down
17 changes: 0 additions & 17 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,6 @@ cpdef array_to_datetime(
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
bint utc_convert = bool(utc)
bint seen_integer = False
bint seen_datetime = False
bint seen_datetime_offset = False
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
Expand Down Expand Up @@ -486,7 +484,6 @@ cpdef array_to_datetime(
iresult[i] = NPY_NAT

elif PyDateTime_Check(val):
seen_datetime = True
if val.tzinfo is not None:
found_tz = True
else:
Expand All @@ -501,12 +498,10 @@ cpdef array_to_datetime(
result[i] = parse_pydatetime(val, &dts, utc_convert)

elif PyDate_Check(val):
seen_datetime = True
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)

elif is_datetime64_object(val):
seen_datetime = True
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)

elif is_integer_object(val) or is_float_object(val):
Expand All @@ -521,7 +516,6 @@ cpdef array_to_datetime(
)
return values, tz_out
# these must be ns unit by-definition
seen_integer = True

if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
Expand Down Expand Up @@ -654,17 +648,6 @@ cpdef array_to_datetime(
except TypeError:
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)

if seen_datetime and seen_integer:
# we have mixed datetimes & integers

if is_coerce:
# coerce all of the integers/floats to NaT, preserve
# the datetimes and other convertibles
for i in range(n):
val = values[i]
if is_integer_object(val) or is_float_object(val):
result[i] = NPY_NAT

if seen_datetime_offset and not utc_convert:
# GH#17697
# 1) If all the offsets are equal, return one offset for
Expand Down
21 changes: 8 additions & 13 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1588,29 +1588,24 @@ def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"exp, arr",
"arr",
[
[
["2013-01-01", "NaT", "NaT"],
[Timestamp("20130101"), 1.434692e18, 1.432766e18],
],
[
["NaT", "NaT", "2013-01-01"],
[1.434692e18, 1.432766e18, Timestamp("20130101")],
],
[Timestamp("20130101"), 1.434692e18, 1.432766e18],
[1.434692e18, 1.432766e18, Timestamp("20130101")],
],
)
def test_unit_mixed(self, cache, exp, arr):

def test_unit_mixed(self, cache, arr):
# GH#50453 pre-2.0 with mixed numeric/datetimes and errors="coerce"
# the numeric entries would be coerced to NaT, was never clear exactly
# why.
# mixed integers/datetimes
expected = DatetimeIndex(exp)
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
result = to_datetime(arr, errors="coerce", cache=cache)
tm.assert_index_equal(result, expected)

# GH#49037 pre-2.0 this raised, but it always worked with Series,
# was never clear why it was disallowed
result = to_datetime(arr, errors="raise", cache=cache)
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
tm.assert_index_equal(result, expected)

result = DatetimeIndex(arr)
Expand Down