Skip to content

TYP: intp in libalgos #40623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -591,16 +591,17 @@ def validate_limit(nobs: int, limit=None) -> int:

@cython.boundscheck(False)
@cython.wraparound(False)
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
# -> ndarray[intp_t, ndim=1]
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
ndarray[intp_t, ndim=1] indexer
algos_t cur, next_val
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer = np.empty(nright, dtype=np.intp)
indexer[:] = -1

lim = validate_limit(nright, limit)
Expand Down Expand Up @@ -737,15 +738,16 @@ D
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
# -> ndarray[intp_t, ndim=1]
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
ndarray[intp_t, ndim=1] indexer
algos_t cur, prev
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
indexer = np.empty(nright, dtype=np.int64)
indexer = np.empty(nright, dtype=np.intp)
indexer[:] = -1

lim = validate_limit(nright, limit)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
fill_value=np.nan):
cdef:
Py_ssize_t i, j, k, n, idx
ndarray[int64_t] idx0 = indexer[0]
ndarray[int64_t] idx1 = indexer[1]
ndarray[intp_t] idx0 = indexer[0]
ndarray[intp_t] idx1 = indexer[1]
{{c_type_out}} fv

n = len(idx0)
Expand Down
13 changes: 6 additions & 7 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -458,19 +458,19 @@ cdef class DatetimeEngine(Int64Engine):
def get_indexer(self, ndarray values):
self._ensure_mapping_populated()
if values.dtype != self._get_box_dtype():
return np.repeat(-1, len(values)).astype('i4')
return np.repeat(-1, len(values)).astype(np.intp)
values = np.asarray(values).view('i8')
return self.mapping.lookup(values)

def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
return np.repeat(-1, len(other)).astype(np.intp)
other = np.asarray(other).view('i8')
return algos.pad(self._get_index_values(), other, limit=limit)

def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
return np.repeat(-1, len(other)).astype(np.intp)
other = np.asarray(other).view('i8')
return algos.backfill(self._get_index_values(), other, limit=limit)

Expand Down Expand Up @@ -653,7 +653,7 @@ cdef class BaseMultiIndexCodesEngine:
ndarray[int64_t, ndim=1] target_order
ndarray[object, ndim=1] target_values
ndarray[int64_t, ndim=1] new_codes, new_target_codes
ndarray[int64_t, ndim=1] sorted_indexer
ndarray[intp_t, ndim=1] sorted_indexer

target_order = np.argsort(target).astype('int64')
target_values = target[target_order]
Expand Down Expand Up @@ -694,9 +694,8 @@ cdef class BaseMultiIndexCodesEngine:
next_code += 1

# get the indexer, and undo the sorting of `target.values`
sorted_indexer = (
algos.backfill if method == "backfill" else algos.pad
)(new_codes, new_target_codes, limit=limit).astype('int64')
algo = algos.backfill if method == "backfill" else algos.pad
sorted_indexer = algo(new_codes, new_target_codes, limit=limit)
return sorted_indexer[np.argsort(target_order)]

def get_loc(self, object key):
Expand Down
25 changes: 12 additions & 13 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
from pandas._typing import ArrayLike

from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import (
ensure_int64,
ensure_platform_int,
)
from pandas.core.dtypes.common import ensure_platform_int
from pandas.core.dtypes.missing import na_value_for_dtype

from pandas.core.construction import ensure_wrapped_if_datetimelike
Expand Down Expand Up @@ -201,7 +198,7 @@ def take_1d(


def take_2d_multi(
arr: np.ndarray, indexer: np.ndarray, fill_value=np.nan
arr: np.ndarray, indexer: tuple[np.ndarray, np.ndarray], fill_value=np.nan
) -> np.ndarray:
"""
Specialized Cython take which sets NaN values in one pass.
Expand All @@ -214,11 +211,9 @@ def take_2d_multi(

row_idx, col_idx = indexer

row_idx = ensure_int64(row_idx)
col_idx = ensure_int64(col_idx)
# error: Incompatible types in assignment (expression has type "Tuple[Any, Any]",
# variable has type "ndarray")
indexer = row_idx, col_idx # type: ignore[assignment]
row_idx = ensure_platform_int(row_idx)
col_idx = ensure_platform_int(col_idx)
indexer = row_idx, col_idx
mask_info = None

# check for promotion based on types only (do this first because
Expand Down Expand Up @@ -474,19 +469,23 @@ def _take_nd_object(
if arr.dtype != out.dtype:
arr = arr.astype(out.dtype)
if arr.shape[axis] > 0:
arr.take(ensure_platform_int(indexer), axis=axis, out=out)
arr.take(indexer, axis=axis, out=out)
if needs_masking:
outindexer = [slice(None)] * arr.ndim
outindexer[axis] = mask
out[tuple(outindexer)] = fill_value


def _take_2d_multi_object(
arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value, mask_info
arr: np.ndarray,
indexer: tuple[np.ndarray, np.ndarray],
out: np.ndarray,
fill_value,
mask_info,
) -> None:
# this is not ideal, performance-wise, but it's better than raising
# an exception (best to optimize in Cython to avoid getting here)
row_idx, col_idx = indexer
row_idx, col_idx = indexer # both np.intp
if mask_info is not None:
(row_mask, col_mask), (row_needs, col_needs) = mask_info
else:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4558,9 +4558,7 @@ def _reindex_multi(self, axes, copy: bool, fill_value) -> DataFrame:
indexer = row_indexer, col_indexer
# error: Argument 2 to "take_2d_multi" has incompatible type "Tuple[Any,
# Any]"; expected "ndarray"
new_values = take_2d_multi(
self.values, indexer, fill_value=fill_value # type: ignore[arg-type]
)
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
return self._constructor(new_values, index=new_index, columns=new_columns)
else:
return self._reindex_with_indexers(
Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,19 +1784,19 @@ def test_pad_backfill_object_segfault():
new = np.array([datetime(2010, 12, 31)], dtype="O")

result = libalgos.pad["object"](old, new)
expected = np.array([-1], dtype=np.int64)
expected = np.array([-1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)

result = libalgos.pad["object"](new, old)
expected = np.array([], dtype=np.int64)
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)

result = libalgos.backfill["object"](old, new)
expected = np.array([-1], dtype=np.int64)
expected = np.array([-1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)

result = libalgos.backfill["object"](new, old)
expected = np.array([], dtype=np.int64)
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)


Expand All @@ -1822,15 +1822,15 @@ def test_backfill(self):

filler = libalgos.backfill["int64_t"](old.values, new.values)

expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64)
expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(filler, expect_filler)

# corner case
old = Index([1, 4])
new = Index(list(range(5, 10)))
filler = libalgos.backfill["int64_t"](old.values, new.values)

expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(filler, expect_filler)

def test_pad(self):
Expand All @@ -1839,14 +1839,14 @@ def test_pad(self):

filler = libalgos.pad["int64_t"](old.values, new.values)

expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64)
expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.intp)
tm.assert_numpy_array_equal(filler, expect_filler)

# corner case
old = Index([5, 10])
new = Index(np.arange(5))
filler = libalgos.pad["int64_t"](old.values, new.values)
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64)
expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(filler, expect_filler)


Expand Down