From c73670694f0eac6341094ec016fec62640660c70 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 9 Apr 2021 09:12:07 -0700 Subject: [PATCH] PERF: use ndarray.take instead of algos.take --- pandas/core/groupby/ops.py | 5 ++--- pandas/core/indexes/base.py | 6 ++---- pandas/core/indexes/multi.py | 8 ++------ pandas/core/internals/managers.py | 2 +- pandas/core/sorting.py | 5 ++--- 5 files changed, 9 insertions(+), 17 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5bf9f81e3073d..6de9898a6dec0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -67,7 +67,6 @@ maybe_fill, ) -from pandas.core import algorithms from pandas.core.arrays import ExtensionArray from pandas.core.base import SelectionMixin import pandas.core.common as com @@ -766,7 +765,7 @@ def _aggregate_series_fast(self, obj: Series, func: F): # avoids object / Series creation overhead indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) - group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) + group_index = group_index.take(indexer) grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups) result, counts = grouper.get_result() return result, counts @@ -997,7 +996,7 @@ def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): @cache_readonly def slabels(self) -> np.ndarray: # np.ndarray[np.intp] # Sorted labels - return algorithms.take_nd(self.labels, self._sort_idx, allow_fill=False) + return self.labels.take(self._sort_idx) @cache_readonly def _sort_idx(self) -> np.ndarray: # np.ndarray[np.intp] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a0727500ecc81..6bb0dc8398734 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2998,7 +2998,7 @@ def _union(self, other: Index, sort): missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) if len(missing) > 0: - other_diff = algos.take_nd(rvals, missing, allow_fill=False) + other_diff = rvals.take(missing) result = concat_compat((lvals, other_diff)) else: # error: Incompatible types in assignment (expression has type @@ -4237,9 +4237,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray: ) if right_lev_indexer is not None: - right_indexer = algos.take_nd( - right_lev_indexer, join_index.codes[level], allow_fill=False - ) + right_indexer = right_lev_indexer.take(join_index.codes[level]) else: right_indexer = join_index.codes[level] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 953796a35db7c..3305610a4022e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3533,14 +3533,10 @@ def equals(self, other: object) -> bool: if not np.array_equal(self_mask, other_mask): return False self_codes = self_codes[~self_mask] - self_values = algos.take_nd( - np.asarray(self.levels[i]._values), self_codes, allow_fill=False - ) + self_values = self.levels[i]._values.take(self_codes) other_codes = other_codes[~other_mask] - other_values = other_values = algos.take_nd( - np.asarray(other.levels[i]._values), other_codes, allow_fill=False - ) + other_values = other.levels[i]._values.take(other_codes) # since we use NaT both datetime64 and timedelta64 we can have a # situation where a level is typed say timedelta64 in self (IOW it diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cdf919923d21c..b93f09c7df806 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -263,7 +263,7 @@ def items(self) -> Index: def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) - return algos.take_nd(dtypes, self.blknos, allow_fill=False) + return dtypes.take(self.blknos) @property def arrays(self) -> list[ArrayLike]: diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index ec7f66f1e515e..9b40ed881eb67 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -31,7 +31,6 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core import algorithms from pandas.core.construction import extract_array if TYPE_CHECKING: @@ -668,10 +667,10 @@ def _reorder_by_uniques(uniques, labels): mask = labels < 0 # move labels to right locations (ie, unsort ascending labels) - labels = algorithms.take_nd(reverse_indexer, labels, allow_fill=False) + labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) # sort observed ids - uniques = algorithms.take_nd(uniques, sorter, allow_fill=False) + uniques = uniques.take(sorter) return uniques, labels