From 127383304a2a6c53e95636d9c4eb94240a159a2e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 18:01:57 -0700 Subject: [PATCH 1/2] TYP: intp in libalgos --- pandas/_libs/algos.pyx | 12 +++++++----- pandas/_libs/algos_take_helper.pxi.in | 4 ++-- pandas/_libs/index.pyx | 13 ++++++------- pandas/core/array_algos/take.py | 25 ++++++++++++------------- pandas/core/frame.py | 4 +--- 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 122a014604bf0..d3edcc4e57b2d 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -591,16 +591,17 @@ def validate_limit(nobs: int, limit=None) -> int: @cython.boundscheck(False) @cython.wraparound(False) -def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): +def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: + # -> ndarray[intp_t, ndim=1] cdef: Py_ssize_t i, j, nleft, nright - ndarray[int64_t, ndim=1] indexer + ndarray[intp_t, ndim=1] indexer algos_t cur, next_val int lim, fill_count = 0 nleft = len(old) nright = len(new) - indexer = np.empty(nright, dtype=np.int64) + indexer = np.empty(nright, dtype=np.intp) indexer[:] = -1 lim = validate_limit(nright, limit) @@ -737,15 +738,16 @@ D @cython.boundscheck(False) @cython.wraparound(False) def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: + # -> ndarray[intp_t, ndim=1] cdef: Py_ssize_t i, j, nleft, nright - ndarray[int64_t, ndim=1] indexer + ndarray[intp_t, ndim=1] indexer algos_t cur, prev int lim, fill_count = 0 nleft = len(old) nright = len(new) - indexer = np.empty(nright, dtype=np.int64) + indexer = np.empty(nright, dtype=np.intp) indexer[:] = -1 lim = validate_limit(nright, limit) diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index 929cb86c41036..11679fc432edc 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -219,8 +219,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, fill_value=np.nan): cdef: Py_ssize_t i, j, k, n, idx - ndarray[int64_t] idx0 = indexer[0] - ndarray[int64_t] idx1 = indexer[1] + ndarray[intp_t] idx0 = indexer[0] + ndarray[intp_t] idx1 = indexer[1] {{c_type_out}} fv n = len(idx0) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 9159fa03c12c0..71f4b0c0ae18f 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -458,19 +458,19 @@ cdef class DatetimeEngine(Int64Engine): def get_indexer(self, ndarray values): self._ensure_mapping_populated() if values.dtype != self._get_box_dtype(): - return np.repeat(-1, len(values)).astype('i4') + return np.repeat(-1, len(values)).astype(np.intp) values = np.asarray(values).view('i8') return self.mapping.lookup(values) def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray: if other.dtype != self._get_box_dtype(): - return np.repeat(-1, len(other)).astype('i4') + return np.repeat(-1, len(other)).astype(np.intp) other = np.asarray(other).view('i8') return algos.pad(self._get_index_values(), other, limit=limit) def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray: if other.dtype != self._get_box_dtype(): - return np.repeat(-1, len(other)).astype('i4') + return np.repeat(-1, len(other)).astype(np.intp) other = np.asarray(other).view('i8') return algos.backfill(self._get_index_values(), other, limit=limit) @@ -653,7 +653,7 @@ cdef class BaseMultiIndexCodesEngine: ndarray[int64_t, ndim=1] target_order ndarray[object, ndim=1] target_values ndarray[int64_t, ndim=1] new_codes, new_target_codes - ndarray[int64_t, ndim=1] sorted_indexer + ndarray[intp_t, ndim=1] sorted_indexer target_order = np.argsort(target).astype('int64') target_values = target[target_order] @@ -694,9 +694,8 @@ cdef class BaseMultiIndexCodesEngine: next_code += 1 # get the indexer, and undo the sorting of `target.values` - sorted_indexer = ( - algos.backfill if method == "backfill" else algos.pad - )(new_codes, new_target_codes, limit=limit).astype('int64') + algo = algos.backfill if method == "backfill" else algos.pad + sorted_indexer = algo(new_codes, new_target_codes, limit=limit) return sorted_indexer[np.argsort(target_order)] def get_loc(self, object key): diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 2a6080e38a732..6dfdc99f4fd9c 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -15,10 +15,7 @@ from pandas._typing import ArrayLike from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.common import ( - ensure_int64, - ensure_platform_int, -) +from pandas.core.dtypes.common import ensure_platform_int from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.construction import ensure_wrapped_if_datetimelike @@ -201,7 +198,7 @@ def take_1d( def take_2d_multi( - arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan + arr: np.ndarray, indexer: tuple[np.ndarray, np.ndarray], fill_value=np.nan ) -> np.ndarray: """ Specialized Cython take which sets NaN values in one pass. @@ -214,11 +211,9 @@ def take_2d_multi( row_idx, col_idx = indexer - row_idx = ensure_int64(row_idx) - col_idx = ensure_int64(col_idx) - # error: Incompatible types in assignment (expression has type "Tuple[Any, Any]", - # variable has type "ndarray") - indexer = row_idx, col_idx # type: ignore[assignment] + row_idx = ensure_platform_int(row_idx) + col_idx = ensure_platform_int(col_idx) + indexer = row_idx, col_idx mask_info = None # check for promotion based on types only (do this first because @@ -474,7 +469,7 @@ def _take_nd_object( if arr.dtype != out.dtype: arr = arr.astype(out.dtype) if arr.shape[axis] > 0: - arr.take(ensure_platform_int(indexer), axis=axis, out=out) + arr.take(indexer, axis=axis, out=out) if needs_masking: outindexer = [slice(None)] * arr.ndim outindexer[axis] = mask @@ -482,11 +477,15 @@ def _take_nd_object( def _take_2d_multi_object( - arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value, mask_info + arr: np.ndarray, + indexer: tuple[np.ndarray, np.ndarray], + out: np.ndarray, + fill_value, + mask_info, ) -> None: # this is not ideal, performance-wise, but it's better than raising # an exception (best to optimize in Cython to avoid getting here) - row_idx, col_idx = indexer + row_idx, col_idx = indexer # both np.intp if mask_info is not None: (row_mask, col_mask), (row_needs, col_needs) = mask_info else: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8d0c8e5f29413..94b4622fadb3d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4558,9 +4558,7 @@ def _reindex_multi(self, axes, copy: bool, fill_value) -> DataFrame: indexer = row_indexer, col_indexer # error: Argument 2 to "take_2d_multi" has incompatible type "Tuple[Any, # Any]"; expected "ndarray" - new_values = take_2d_multi( - self.values, indexer, fill_value=fill_value # type: ignore[arg-type] - ) + new_values = take_2d_multi(self.values, indexer, fill_value=fill_value) return self._constructor(new_values, index=new_index, columns=new_columns) else: return self._reindex_with_indexers( From 58ef501c35e4e237397d1d6022514ede8efce8e4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 18:36:27 -0700 Subject: [PATCH 2/2] update tests --- pandas/tests/test_algos.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cd800b3f3a452..33dfde7dfef61 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1784,19 +1784,19 @@ def test_pad_backfill_object_segfault(): new = np.array([datetime(2010, 12, 31)], dtype="O") result = libalgos.pad["object"](old, new) - expected = np.array([-1], dtype=np.int64) + expected = np.array([-1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = libalgos.pad["object"](new, old) - expected = np.array([], dtype=np.int64) + expected = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = libalgos.backfill["object"](old, new) - expected = np.array([-1], dtype=np.int64) + expected = np.array([-1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = libalgos.backfill["object"](new, old) - expected = np.array([], dtype=np.int64) + expected = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) @@ -1822,7 +1822,7 @@ def test_backfill(self): filler = libalgos.backfill["int64_t"](old.values, new.values) - expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) + expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler) # corner case @@ -1830,7 +1830,7 @@ def test_backfill(self): new = Index(list(range(5, 10))) filler = libalgos.backfill["int64_t"](old.values, new.values) - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler) def test_pad(self): @@ -1839,14 +1839,14 @@ def test_pad(self): filler = libalgos.pad["int64_t"](old.values, new.values) - expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) + expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler) # corner case old = Index([5, 10]) new = Index(np.arange(5)) filler = libalgos.pad["int64_t"](old.values, new.values) - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler)