diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 59cc709359a8d..694b334614975 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -339,6 +339,9 @@ Performance improvements - Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`) - Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`) - Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`) +- Performance improvement in :meth:`RangeIndex.searchsorted` (:issue:`58376`) +- Performance improvement in :meth:`RangeIndex.to_numpy` when specifying an ``na_value`` (:issue:`58376`) +- Performance improvement in :meth:`RangeIndex.value_counts` (:issue:`58376`) - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`) - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 424f0609dd485..bda3058ca663c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -547,7 +547,6 @@ def array(self) -> ExtensionArray: """ raise AbstractMethodError(self) - @final def to_numpy( self, dtype: npt.DTypeLike | None = None, @@ -659,7 +658,7 @@ def to_numpy( ) values = self._values - if fillna: + if fillna and self.hasnans: if not can_hold_element(values, na_value): # if we can't hold the na_value asarray either makes a copy or we # error before modifying values. The asarray later on thus won't make @@ -899,7 +898,6 @@ def _map_values(self, mapper, na_action=None): return algorithms.map_array(arr, mapper, na_action=na_action) - @final def value_counts( self, normalize: bool = False, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ba3c22093c69..bd9e8b84fd82a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -57,9 +57,13 @@ Dtype, JoinHow, NaPosition, + NumpySorter, Self, npt, ) + + from pandas import Series + _empty_range = range(0) _dtype_int64 = np.dtype(np.int64) @@ -1359,3 +1363,64 @@ def take( # type: ignore[override] taken += self.start return self._shallow_copy(taken, name=self.name) + + def value_counts( + self, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins=None, + dropna: bool = True, + ) -> Series: + from pandas import Series + + if bins is not None: + return super().value_counts( + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + dropna=dropna, + ) + name = "proportion" if normalize else "count" + data: npt.NDArray[np.floating] | npt.NDArray[np.signedinteger] = np.ones( + len(self), dtype=np.int64 + ) + if normalize: + data = data / len(self) + return Series(data, index=self.copy(), name=name) + + def searchsorted( # type: ignore[override] + self, + value, + side: Literal["left", "right"] = "left", + sorter: NumpySorter | None = None, + ) -> npt.NDArray[np.intp] | np.intp: + if side not in {"left", "right"} or sorter is not None: + return super().searchsorted(value=value, side=side, sorter=sorter) + + was_scalar = False + if is_scalar(value): + was_scalar = True + array_value = np.array([value]) + else: + array_value = np.asarray(value) + if array_value.dtype.kind not in "iu": + return super().searchsorted(value=value, side=side, sorter=sorter) + + if flip := (self.step < 0): + rng = self._range[::-1] + start = rng.start + step = rng.step + shift = side == "right" + else: + start = self.start + step = self.step + shift = side == "left" + result = (array_value - start - int(shift)) // step + 1 + if flip: + result = len(self) - result + result = np.maximum(np.minimum(result, len(self)), 0) + if was_scalar: + return np.intp(result.item()) + return result.astype(np.intp, copy=False) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 727edb7ae30ad..1f9df30d60c11 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -874,3 +874,36 @@ def test_getitem_integers_return_index(): result = RangeIndex(0, 10, 2, name="foo")[[0, 1, -1]] expected = Index([0, 2, 8], dtype="int64", name="foo") tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize( + "rng", + [ + range(3), + range(0), + range(0, 3, 2), + range(3, -3, -2), + ], +) +def test_value_counts(sort, dropna, ascending, normalize, rng): + ri = RangeIndex(rng, name="A") + result = ri.value_counts( + normalize=normalize, sort=sort, ascending=ascending, dropna=dropna + ) + expected = Index(list(rng), name="A").value_counts( + normalize=normalize, sort=sort, ascending=ascending, dropna=dropna + ) + tm.assert_series_equal(result, expected, check_index_type=False) + + +@pytest.mark.parametrize("side", ["left", "right"]) +@pytest.mark.parametrize("value", [0, -5, 5, -3, np.array([-5, -3, 0, 5])]) +def test_searchsorted(side, value): + ri = RangeIndex(-3, 3, 2) + result = ri.searchsorted(value=value, side=side) + expected = Index(list(ri)).searchsorted(value=value, side=side) + if isinstance(value, int): + assert result == expected + else: + tm.assert_numpy_array_equal(result, expected)