Skip to content

REF: Avoid new object creation when reverse slicing when possible #57902

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2371,11 +2371,12 @@ def factorize(
):
if self.freq is not None:
# We must be unique, so can short-circuit (and retain freq)
codes = np.arange(len(self), dtype=np.intp)
uniques = self.copy() # TODO: copy or view?
if sort and self.freq.n < 0:
codes = codes[::-1]
uniques = uniques[::-1]
codes = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
uniques = self[::-1]
else:
codes = np.arange(len(self), dtype=np.intp)
uniques = self.copy() # TODO: copy or view?
return codes, uniques

if sort:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2116,7 +2116,7 @@ def droplevel(self, level: IndexLabel = 0):
if not isinstance(level, (tuple, list)):
level = [level]

levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
levnums = sorted((self._get_level_number(lev) for lev in level), reverse=True)

return self._drop_level_numbers(levnums)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3589,7 +3589,7 @@ def _reorder_indexer(
new_order = key_order_map[self.codes[i][indexer]]
elif isinstance(k, slice) and k.step is not None and k.step < 0:
# flip order for negative step
new_order = np.arange(n)[::-1][indexer]
new_order = np.arange(n - 1, -1, -1)[indexer]
elif isinstance(k, slice) and k.start is None and k.stop is None:
# slice(None) should not determine order GH#31330
new_order = np.ones((n,), dtype=np.intp)[indexer]
Expand Down
54 changes: 27 additions & 27 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@
_dtype_int64 = np.dtype(np.int64)


def min_fitting_element(start: int, step: int, lower_limit: int) -> int:
"""Returns the smallest element greater than or equal to the limit"""
no_steps = -(-(lower_limit - start) // abs(step))
return start + abs(step) * no_steps


class RangeIndex(Index):
"""
Immutable Index implementing a monotonic integer range.
Expand Down Expand Up @@ -586,25 +592,30 @@ def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant
nv.validate_argsort(args, kwargs)

start, stop, step = None, None, None
if self._range.step > 0:
result = np.arange(len(self), dtype=np.intp)
if ascending:
start = len(self)
else:
start, stop, step = len(self) - 1, -1, -1
elif ascending:
start, stop, step = len(self) - 1, -1, -1
else:
result = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
start = len(self)

if not ascending:
result = result[::-1]
return result
return np.arange(start, stop, step, dtype=np.intp)

def factorize(
self,
sort: bool = False,
use_na_sentinel: bool = True,
) -> tuple[npt.NDArray[np.intp], RangeIndex]:
codes = np.arange(len(self), dtype=np.intp)
uniques = self
if sort and self.step < 0:
codes = codes[::-1]
uniques = uniques[::-1]
codes = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
uniques = self[::-1]
else:
codes = np.arange(len(self), dtype=np.intp)
uniques = self
return codes, uniques

def equals(self, other: object) -> bool:
Expand Down Expand Up @@ -715,26 +726,15 @@ def _intersection(self, other: Index, sort: bool = False):
# intersection disregarding the lower bounds
tmp_start = first.start + (second.start - first.start) * first.step // gcd * s
new_step = first.step * second.step // gcd
new_range = range(tmp_start, int_high, new_step)
new_index = self._simple_new(new_range)

# adjust index to limiting interval
new_start = new_index._min_fitting_element(int_low)
new_range = range(new_start, new_index.stop, new_index.step)
new_index = self._simple_new(new_range)
new_start = min_fitting_element(tmp_start, new_step, int_low)
new_range = range(new_start, int_high, new_step)

if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
new_index = new_index[::-1]
if (self.step < 0 and other.step < 0) is not (new_range.step < 0):
new_range = new_range[::-1]

if sort is None:
new_index = new_index.sort_values()

return new_index

def _min_fitting_element(self, lower_limit: int) -> int:
"""Returns the smallest element greater than or equal to the limit"""
no_steps = -(-(lower_limit - self.start) // abs(self.step))
return self.start + abs(self.step) * no_steps
return self._simple_new(new_range)

def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]:
"""
Expand Down Expand Up @@ -920,9 +920,9 @@ def _difference(self, other, sort=None):
# e.g. range(10) and range(0, 10, 3)
return super()._difference(other, sort=sort)

new_index = type(self)._simple_new(new_rng, name=res_name)
if first is not self._range:
new_index = new_index[::-1]
new_rng = new_rng[::-1]
new_index = type(self)._simple_new(new_rng, name=res_name)

return new_index

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,7 @@ def _contains_slice(x: object) -> bool:
# GH#41369 Loop in reverse order ensures indexing along columns before rows
# which selects only necessary blocks which avoids dtype conversion if possible
axis = len(tup) - 1
for key in tup[::-1]:
for key in reversed(tup):
if com.is_null_slice(key):
axis -= 1
continue
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1549,9 +1549,9 @@ def _insert_update_blklocs_and_blknos(self, loc) -> None:
self._blklocs = np.append(self._blklocs, 0)
self._blknos = np.append(self._blknos, len(self.blocks))
elif loc == 0:
# np.append is a lot faster, let's use it if we can.
self._blklocs = np.append(self._blklocs[::-1], 0)[::-1]
self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1]
# As of numpy 1.26.4, np.concatenate faster than np.append
self._blklocs = np.concatenate([[0], self._blklocs])
self._blknos = np.concatenate([[len(self.blocks)], self._blknos])
else:
new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos(
self.blklocs, self.blknos, loc, len(self.blocks)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,9 +910,10 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
raise ValueError("Columns with duplicate values are not supported in stack")

# If we need to drop `level` from columns, it needs to be in descending order
set_levels = set(level)
drop_levnums = sorted(level, reverse=True)
stack_cols = frame.columns._drop_level_numbers(
[k for k in range(frame.columns.nlevels) if k not in level][::-1]
[k for k in range(frame.columns.nlevels - 1, -1, -1) if k not in set_levels]
)
if len(level) > 1:
# Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
Expand All @@ -936,7 +937,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
idx = (idx,)
gen = iter(idx)
column_indexer = tuple(
next(gen) if k in level else slice(None)
next(gen) if k in set_levels else slice(None)
for k in range(frame.columns.nlevels)
)
data = frame.loc[:, column_indexer]
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5510,9 +5510,9 @@ def case_when(
replacements = updated_replacements
default = default.astype(common_dtype)

counter = reversed(range(len(conditions)))
counter = range(len(conditions) - 1, -1, -1)
for position, condition, replacement in zip(
counter, conditions[::-1], replacements[::-1]
counter, reversed(conditions), reversed(replacements)
):
try:
default = default.mask(
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import itertools
from typing import (
TYPE_CHECKING,
Callable,
Expand Down Expand Up @@ -334,13 +335,15 @@ def lexsort_indexer(
raise ValueError(f"invalid na_position: {na_position}")

if isinstance(orders, bool):
orders = [orders] * len(keys)
orders = itertools.repeat(orders, len(keys))
elif orders is None:
orders = [True] * len(keys)
orders = itertools.repeat(True, len(keys))
else:
orders = reversed(orders)

labels = []

for k, order in zip(keys, orders):
for k, order in zip(reversed(keys), orders):
k = ensure_key_mapped(k, key)
if codes_given:
codes = cast(np.ndarray, k)
Expand All @@ -361,7 +364,7 @@ def lexsort_indexer(

labels.append(codes)

return np.lexsort(labels[::-1])
return np.lexsort(labels)


def nargsort(
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/indexes/ranges/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
RangeIndex,
)
import pandas._testing as tm
from pandas.core.indexes.range import min_fitting_element


class TestRangeIndex:
Expand Down Expand Up @@ -419,21 +420,21 @@ def test_extended_gcd(self, simple_index):
assert 2 == result[0]

def test_min_fitting_element(self):
result = RangeIndex(0, 20, 2)._min_fitting_element(1)
result = min_fitting_element(0, 2, 1)
assert 2 == result

result = RangeIndex(1, 6)._min_fitting_element(1)
result = min_fitting_element(1, 1, 1)
assert 1 == result

result = RangeIndex(18, -2, -2)._min_fitting_element(1)
result = min_fitting_element(18, -2, 1)
assert 2 == result

result = RangeIndex(5, 0, -1)._min_fitting_element(1)
result = min_fitting_element(5, -1, 1)
assert 1 == result

big_num = 500000000000000000000000

result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num)
result = min_fitting_element(5, 1, big_num)
assert big_num == result

def test_slice_specialised(self, simple_index):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/ranges/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ def test_intersection(self, sort):
# GH 17296: intersect two decreasing RangeIndexes
first = RangeIndex(10, -2, -2)
other = RangeIndex(5, -4, -1)
expected = first.astype(int).intersection(other.astype(int), sort=sort)
result = first.intersection(other, sort=sort).astype(int)
expected = RangeIndex(start=4, stop=-2, step=-2)
result = first.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)

# reversed
result = other.intersection(first, sort=sort).astype(int)
result = other.intersection(first, sort=sort)
tm.assert_index_equal(result, expected)

index = RangeIndex(5, name="foo")
Expand Down