diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 88bf0e005a221..16426e11c5a24 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -759,6 +759,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`) - Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`) - Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`) +- Bug in :meth:`DatetimeIndex.get_indexer` where incorrect output would be returned for mixed datetime-like targets (:issue:`33741`) - Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`) - Bug in :class:`DatetimeIndex` where setting the ``freq`` attribute on an index could silently change the ``freq`` attribute on another index viewing the same data (:issue:`33552`) - :meth:`DataFrame.min`/:meth:`DataFrame.max` not returning consistent result with :meth:`Series.min`/:meth:`Series.max` when called on objects initialized with empty :func:`pd.to_datetime` diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 222b7af4e4b1c..ea97bab2198eb 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1380,8 +1380,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str: return "mixed-integer" elif PyDateTime_Check(val): - if is_datetime_array(values): + if is_datetime_array(values, skipna=skipna): return "datetime" + elif is_date_array(values, skipna=skipna): + return "date" elif PyDate_Check(val): if is_date_array(values, skipna=skipna): @@ -1752,10 +1754,10 @@ cdef class DatetimeValidator(TemporalValidator): return is_null_datetime64(value) -cpdef bint is_datetime_array(ndarray values): +cpdef bint is_datetime_array(ndarray values, bint skipna=True): cdef: DatetimeValidator validator = DatetimeValidator(len(values), - skipna=True) + skipna=skipna) return validator.validate(values) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fb266b4abba51..746fd140e48a1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4701,7 +4701,10 @@ def _maybe_promote(self, other: "Index"): """ if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): - return type(other)(self), other + try: + return type(other)(self), other + except OutOfBoundsDatetime: + return self, other elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): # TODO: we dont have tests that get here return type(other)(self), other diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 4c4a5547247fc..e97716f7a5e9c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1106,6 +1106,21 @@ def test_date(self): result = lib.infer_dtype(dates, skipna=True) assert result == "date" + @pytest.mark.parametrize( + "values", + [ + [date(2020, 1, 1), pd.Timestamp("2020-01-01")], + [pd.Timestamp("2020-01-01"), date(2020, 1, 1)], + [date(2020, 1, 1), pd.NaT], + [pd.NaT, date(2020, 1, 1)], + ], + ) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_date_order_invariant(self, values, skipna): + # https://github.com/pandas-dev/pandas/issues/33741 + result = lib.infer_dtype(values, skipna=skipna) + assert result == "date" + def test_is_numeric_array(self): assert lib.is_float_array(np.array([1, 2.0])) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 097ee20534e4e..f08472fe72631 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -1,4 +1,4 @@ -from datetime import datetime, time, timedelta +from datetime import date, datetime, time, timedelta import numpy as np import pytest @@ -575,6 +575,38 @@ def test_get_indexer(self): with pytest.raises(ValueError, match="abbreviation w/o a number"): idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") + @pytest.mark.parametrize( + "target", + [ + [date(2020, 1, 1), pd.Timestamp("2020-01-02")], + [pd.Timestamp("2020-01-01"), date(2020, 1, 2)], + ], + ) + def test_get_indexer_mixed_dtypes(self, target): + # https://github.com/pandas-dev/pandas/issues/33741 + values = pd.DatetimeIndex( + [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")] + ) + result = values.get_indexer(target) + expected = np.array([0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "target, positions", + [ + ([date(9999, 1, 1), pd.Timestamp("2020-01-01")], [-1, 0]), + ([pd.Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]), + ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), + ], + ) + def test_get_indexer_out_of_bounds_date(self, target, positions): + values = pd.DatetimeIndex( + [pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")] + ) + result = values.get_indexer(target) + expected = np.array(positions, dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + class TestMaybeCastSliceBound: def test_maybe_cast_slice_bounds_empty(self):