diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 47f67e9c2a4b3..6eedf9dee5266 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -635,6 +635,7 @@ Indexing - Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`) - Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`) - Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`) +- Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e26dc5b9e4fb3..a12d5b64bb06c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -29,6 +29,8 @@ from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.base import IndexOpsMixin +import pandas.core.common as com +from pandas.core.construction import array as pd_array, extract_array import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.extension import ( @@ -39,6 +41,7 @@ from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.sorting import ensure_key_mapped +from pandas.core.tools.datetimes import DateParseError from pandas.core.tools.timedeltas import to_timedelta from pandas.tseries.offsets import DateOffset, Tick @@ -573,6 +576,22 @@ def _wrap_joined_index(self, joined: np.ndarray, other): return type(self)._simple_new(new_data, name=name) + @doc(Index._convert_arr_indexer) + def _convert_arr_indexer(self, keyarr): + if lib.infer_dtype(keyarr) == "string": + # Weak reasoning that indexer is a list of strings + # representing datetime or timedelta or period + try: + extension_arr = pd_array(keyarr, self.dtype) + except (ValueError, DateParseError): + # Fail to infer keyarr from self.dtype + return keyarr + + converted_arr = extract_array(extension_arr, extract_numpy=True) + else: + converted_arr = com.asarray_tuplesafe(keyarr) + return converted_arr + class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): """ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5752f00ca5a18..e51ec33ba8519 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1228,11 +1228,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): indexer, keyarr = ax._convert_listlike_indexer(key) # We only act on all found values: if indexer is not None and (indexer != -1).all(): - self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) + self._validate_read_indexer( + keyarr, indexer, axis, raise_missing=raise_missing + ) return ax[indexer], indexer if ax.is_unique and not getattr(ax, "is_overlapping", False): - indexer = ax.get_indexer_for(key) + indexer = ax.get_indexer_for(keyarr) keyarr = ax.reindex(keyarr)[0] else: keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index a1bd6fed32cad..513ca039366cb 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, date_range +from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range import pandas._testing as tm @@ -535,3 +535,118 @@ def test_partial_set_empty_frame_empty_consistencies(self): df.loc[0, "x"] = 1 expected = DataFrame(dict(x=[1], y=[np.nan])) tm.assert_frame_equal(df, expected, check_dtype=False) + + @pytest.mark.parametrize( + "idx,labels,expected_idx", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Period("2000-01-04", freq="D"), + Period("2000-01-08", freq="D"), + Period("2000-01-12", freq="D"), + ], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-08", "2000-01-12"], + [ + Timestamp("2000-01-04", freq="D"), + Timestamp("2000-01-08", freq="D"), + Timestamp("2000-01-12", freq="D"), + ], + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["4D", "8D", "12D"], + [pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")], + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes( + self, idx, labels, expected_idx + ): + # GH 11278 + s = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + + expected_value = [3, 7, 11] + expected_s = Series(expected_value, expected_idx) + expected_df = DataFrame(expected_value, expected_idx) + + tm.assert_series_equal(expected_s, s.loc[labels]) + tm.assert_series_equal(expected_s, s[labels]) + tm.assert_frame_equal(expected_df, df.loc[labels]) + + @pytest.mark.parametrize( + "idx,labels", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["2000-01-04", "2000-01-30"], + ), + (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_missing_value( + self, idx, labels + ): + # GH 11278 + s = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + msg = r"with any missing labels" + + with pytest.raises(KeyError, match=msg): + s.loc[labels] + with pytest.raises(KeyError, match=msg): + s[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels] + + @pytest.mark.parametrize( + "idx,labels,msg", + [ + ( + period_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + date_range(start="2000", periods=20, freq="D"), + ["4D", "8D"], + ( + r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " + r"are in the \[index\]" + ), + ), + ( + pd.timedelta_range(start="1 day", periods=20), + ["2000-01-04", "2000-01-08"], + ( + r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " + r"dtype='object'\)\] are in the \[index\]" + ), + ), + ], + ) + def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( + self, idx, labels, msg + ): + # GH 11278 + s = Series(range(20), index=idx) + df = DataFrame(range(20), index=idx) + + with pytest.raises(KeyError, match=msg): + s.loc[labels] + with pytest.raises(KeyError, match=msg): + s[labels] + with pytest.raises(KeyError, match=msg): + df.loc[labels]