diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 330510c2c883c..f759e33d395b8 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -905,6 +905,7 @@ Indexing - :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`) - Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) - Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) +- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) Missing ^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1edcf581c5131..cd2cc2d8b324f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6976,8 +6976,7 @@ def asof(self, where, subset=None): if not is_list: start = self.index[0] if isinstance(self.index, PeriodIndex): - where = Period(where, freq=self.index.freq).ordinal - start = start.ordinal + where = Period(where, freq=self.index.freq) if where < start: if not is_series: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6ba778bc83dd1..a7c4bfbacc5e8 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -479,17 +479,19 @@ def astype(self, dtype, copy=True, how="start"): @Substitution(klass="PeriodIndex") @Appender(_shared_docs["searchsorted"]) def searchsorted(self, value, side="left", sorter=None): - if isinstance(value, Period): - if value.freq != self.freq: - raise raise_on_incompatible(self, value) - value = value.ordinal + if isinstance(value, Period) or value is NaT: + self._data._check_compatible_with(value) elif isinstance(value, str): try: - value = Period(value, freq=self.freq).ordinal + value = Period(value, freq=self.freq) except DateParseError: raise KeyError(f"Cannot interpret '{value}' as period") + elif not isinstance(value, PeriodArray): + raise TypeError( + "PeriodIndex.searchsorted requires either a Period or PeriodArray" + ) - return self._ndarray_values.searchsorted(value, side=side, sorter=sorter) + return self._data.searchsorted(value, side=side, sorter=sorter) @property def is_full(self) -> bool: @@ -722,8 +724,7 @@ def _get_string_slice(self, key): t1, t2 = self._parsed_string_to_bounds(reso, parsed) return slice( - self.searchsorted(t1.ordinal, side="left"), - self.searchsorted(t2.ordinal, side="right"), + self.searchsorted(t1, side="left"), self.searchsorted(t2, side="right") ) def _convert_tolerance(self, tolerance, target): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 056ba73edfe34..0e43880dfda07 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1586,7 +1586,10 @@ def _get_period_bins(self, ax): rng += freq_mult # adjust bin edge indexes to account for base rng -= bin_shift - bins = memb.searchsorted(rng, side="left") + + # Wrap in PeriodArray for PeriodArray.searchsorted + prng = type(memb._data)(rng, dtype=memb.dtype) + bins = memb.searchsorted(prng, side="left") if nat_count > 0: # NaT handling as in pandas._lib.lib.generate_bins_dt64() diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 774369794cb90..0291be0a4083e 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, Timestamp, date_range, to_datetime +from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime import pandas._testing as tm @@ -80,6 +80,12 @@ def test_missing(self, date_range_frame): ) tm.assert_frame_equal(result, expected) + # Check that we handle PeriodIndex correctly, dont end up with + # period.ordinal for series name + df = df.to_period("D") + result = df.asof("1989-12-31") + assert isinstance(result.name, Period) + def test_all_nans(self, date_range_frame): # GH 15713 # DataFrame is all nans diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4eacf4038b794..16fa0b0c25925 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -451,7 +451,7 @@ def test_index_duplicate_periods(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) - result = ts[2007] + result = ts["2007"] expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 @@ -461,7 +461,7 @@ def test_index_duplicate_periods(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) - result = ts[2007] + result = ts["2007"] expected = ts[idx == "2007"] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 2135b8a992128..28ab14af71362 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -231,14 +231,43 @@ def test_searchsorted(self, freq): p2 = pd.Period("2014-01-04", freq=freq) assert pidx.searchsorted(p2) == 3 - msg = "Input has different freq=H from PeriodIndex" + assert pidx.searchsorted(pd.NaT) == 0 + + msg = "Input has different freq=H from PeriodArray" with pytest.raises(IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period("2014-01-01", freq="H")) - msg = "Input has different freq=5D from PeriodIndex" + msg = "Input has different freq=5D from PeriodArray" with pytest.raises(IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period("2014-01-01", freq="5D")) + def test_searchsorted_invalid(self): + pidx = pd.PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + + other = np.array([0, 1], dtype=np.int64) + + msg = "requires either a Period or PeriodArray" + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other.astype("timedelta64[ns]")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64(4)) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64("NaT", "ms")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64(4, "ns")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64("NaT", "ns")) + class TestPeriodIndexConversion: def test_tolist(self):