pandas-dev · jreback · Mar 26, 2021 · Mar 25, 2021 · Mar 25, 2021
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -591,16 +591,17 @@ def validate_limit(nobs: int, limit=None) -> int:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
+    # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
-        ndarray[int64_t, ndim=1] indexer
+        ndarray[intp_t, ndim=1] indexer
         algos_t cur, next_val
         int lim, fill_count = 0
 
     nleft = len(old)
     nright = len(new)
-    indexer = np.empty(nright, dtype=np.int64)
+    indexer = np.empty(nright, dtype=np.intp)
     indexer[:] = -1
 
     lim = validate_limit(nright, limit)
@@ -737,15 +738,16 @@ D
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
+    # -> ndarray[intp_t, ndim=1]
     cdef:
         Py_ssize_t i, j, nleft, nright
-        ndarray[int64_t, ndim=1] indexer
+        ndarray[intp_t, ndim=1] indexer
         algos_t cur, prev
         int lim, fill_count = 0
 
     nleft = len(old)
     nright = len(new)
-    indexer = np.empty(nright, dtype=np.int64)
+    indexer = np.empty(nright, dtype=np.intp)
     indexer[:] = -1
 
     lim = validate_limit(nright, limit)

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -219,8 +219,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
                                     fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        ndarray[int64_t] idx0 = indexer[0]
-        ndarray[int64_t] idx1 = indexer[1]
+        ndarray[intp_t] idx0 = indexer[0]
+        ndarray[intp_t] idx1 = indexer[1]
         {{c_type_out}} fv
 
     n = len(idx0)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -458,19 +458,19 @@ cdef class DatetimeEngine(Int64Engine):
     def get_indexer(self, ndarray values):
         self._ensure_mapping_populated()
         if values.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(values)).astype('i4')
+            return np.repeat(-1, len(values)).astype(np.intp)
         values = np.asarray(values).view('i8')
         return self.mapping.lookup(values)
 
     def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
         if other.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(other)).astype('i4')
+            return np.repeat(-1, len(other)).astype(np.intp)
         other = np.asarray(other).view('i8')
         return algos.pad(self._get_index_values(), other, limit=limit)
 
     def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
         if other.dtype != self._get_box_dtype():
-            return np.repeat(-1, len(other)).astype('i4')
+            return np.repeat(-1, len(other)).astype(np.intp)
         other = np.asarray(other).view('i8')
         return algos.backfill(self._get_index_values(), other, limit=limit)
 
@@ -653,7 +653,7 @@ cdef class BaseMultiIndexCodesEngine:
             ndarray[int64_t, ndim=1] target_order
             ndarray[object, ndim=1] target_values
             ndarray[int64_t, ndim=1] new_codes, new_target_codes
-            ndarray[int64_t, ndim=1] sorted_indexer
+            ndarray[intp_t, ndim=1] sorted_indexer
 
         target_order = np.argsort(target).astype('int64')
         target_values = target[target_order]
@@ -694,9 +694,8 @@ cdef class BaseMultiIndexCodesEngine:
             next_code += 1
 
         # get the indexer, and undo the sorting of `target.values`
-        sorted_indexer = (
-            algos.backfill if method == "backfill" else algos.pad
-        )(new_codes, new_target_codes, limit=limit).astype('int64')
+        algo = algos.backfill if method == "backfill" else algos.pad
+        sorted_indexer = algo(new_codes, new_target_codes, limit=limit)
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):

diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
@@ -15,10 +15,7 @@
 from pandas._typing import ArrayLike
 
 from pandas.core.dtypes.cast import maybe_promote
-from pandas.core.dtypes.common import (
-    ensure_int64,
-    ensure_platform_int,
-)
+from pandas.core.dtypes.common import ensure_platform_int
 from pandas.core.dtypes.missing import na_value_for_dtype
 
 from pandas.core.construction import ensure_wrapped_if_datetimelike
@@ -201,7 +198,7 @@ def take_1d(
 
 
 def take_2d_multi(
-    arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan
+    arr: np.ndarray, indexer: tuple[np.ndarray, np.ndarray], fill_value=np.nan
 ) -> np.ndarray:
     """
     Specialized Cython take which sets NaN values in one pass.
@@ -214,11 +211,9 @@ def take_2d_multi(
 
     row_idx, col_idx = indexer
 
-    row_idx = ensure_int64(row_idx)
-    col_idx = ensure_int64(col_idx)
-    # error: Incompatible types in assignment (expression has type "Tuple[Any, Any]",
-    # variable has type "ndarray")
-    indexer = row_idx, col_idx  # type: ignore[assignment]
+    row_idx = ensure_platform_int(row_idx)
+    col_idx = ensure_platform_int(col_idx)
+    indexer = row_idx, col_idx
     mask_info = None
 
     # check for promotion based on types only (do this first because
@@ -474,19 +469,23 @@ def _take_nd_object(
     if arr.dtype != out.dtype:
         arr = arr.astype(out.dtype)
     if arr.shape[axis] > 0:
-        arr.take(ensure_platform_int(indexer), axis=axis, out=out)
+        arr.take(indexer, axis=axis, out=out)
     if needs_masking:
         outindexer = [slice(None)] * arr.ndim
         outindexer[axis] = mask
         out[tuple(outindexer)] = fill_value
 
 
 def _take_2d_multi_object(
-    arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value, mask_info
+    arr: np.ndarray,
+    indexer: tuple[np.ndarray, np.ndarray],
+    out: np.ndarray,
+    fill_value,
+    mask_info,
 ) -> None:
     # this is not ideal, performance-wise, but it's better than raising
     # an exception (best to optimize in Cython to avoid getting here)
-    row_idx, col_idx = indexer
+    row_idx, col_idx = indexer  # both np.intp
     if mask_info is not None:
         (row_mask, col_mask), (row_needs, col_needs) = mask_info
     else:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4558,9 +4558,7 @@ def _reindex_multi(self, axes, copy: bool, fill_value) -> DataFrame:
             indexer = row_indexer, col_indexer
             # error: Argument 2 to "take_2d_multi" has incompatible type "Tuple[Any,
             # Any]"; expected "ndarray"
-            new_values = take_2d_multi(
-                self.values, indexer, fill_value=fill_value  # type: ignore[arg-type]
-            )
+            new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
             return self._constructor(new_values, index=new_index, columns=new_columns)
         else:
             return self._reindex_with_indexers(

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1784,19 +1784,19 @@ def test_pad_backfill_object_segfault():
     new = np.array([datetime(2010, 12, 31)], dtype="O")
 
     result = libalgos.pad["object"](old, new)
-    expected = np.array([-1], dtype=np.int64)
+    expected = np.array([-1], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
 
     result = libalgos.pad["object"](new, old)
-    expected = np.array([], dtype=np.int64)
+    expected = np.array([], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
 
     result = libalgos.backfill["object"](old, new)
-    expected = np.array([-1], dtype=np.int64)
+    expected = np.array([-1], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
 
     result = libalgos.backfill["object"](new, old)
-    expected = np.array([], dtype=np.int64)
+    expected = np.array([], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
 
 
@@ -1822,15 +1822,15 @@ def test_backfill(self):
 
         filler = libalgos.backfill["int64_t"](old.values, new.values)
 
-        expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64)
+        expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp)
         tm.assert_numpy_array_equal(filler, expect_filler)
 
         # corner case
         old = Index([1, 4])
         new = Index(list(range(5, 10)))
         filler = libalgos.backfill["int64_t"](old.values, new.values)
 
-        expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
+        expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
         tm.assert_numpy_array_equal(filler, expect_filler)
 
     def test_pad(self):
@@ -1839,14 +1839,14 @@ def test_pad(self):
 
         filler = libalgos.pad["int64_t"](old.values, new.values)
 
-        expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64)
+        expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp)
         tm.assert_numpy_array_equal(filler, expect_filler)
 
         # corner case
         old = Index([5, 10])
         new = Index(np.arange(5))
         filler = libalgos.pad["int64_t"](old.values, new.values)
-        expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
+        expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
         tm.assert_numpy_array_equal(filler, expect_filler)