Skip to content

PERF: declare contiguity in libgroubpy #40295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def ensure_platform_int(object arr):
if (<ndarray>arr).descr.type_num == PLATFORM_INT:
return arr
else:
return arr.astype(np.intp)
# equiv: arr.astype(np.intp)
return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
else:
return np.array(arr, dtype=np.intp)

Expand All @@ -28,7 +29,8 @@ def ensure_object(object arr):
if (<ndarray>arr).descr.type_num == NPY_OBJECT:
return arr
else:
return arr.astype(np.object_)
# equiv: arr.astype(object)
return cnp.PyArray_Cast(<ndarray>arr, NPY_OBJECT)
else:
return np.array(arr, dtype=np.object_)

Expand Down
84 changes: 42 additions & 42 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumprod_float64(float64_t[:, :] out,
def group_cumprod_float64(float64_t[:, ::1] out,
const float64_t[:, :] values,
const int64_t[:] labels,
int ngroups,
Expand Down Expand Up @@ -200,7 +200,7 @@ def group_cumprod_float64(float64_t[:, :] out,
cdef:
Py_ssize_t i, j, N, K, size
float64_t val
float64_t[:, :] accum
float64_t[:, ::1] accum
int64_t lab

N, K = (<object>values).shape
Expand All @@ -226,7 +226,7 @@ def group_cumprod_float64(float64_t[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumsum(numeric[:, :] out,
def group_cumsum(numeric[:, ::1] out,
ndarray[numeric, ndim=2] values,
const int64_t[:] labels,
int ngroups,
Expand Down Expand Up @@ -257,7 +257,7 @@ def group_cumsum(numeric[:, :] out,
cdef:
Py_ssize_t i, j, N, K, size
numeric val, y, t
numeric[:, :] accum, compensation
numeric[:, ::1] accum, compensation
int64_t lab

N, K = (<object>values).shape
Expand Down Expand Up @@ -295,14 +295,14 @@ def group_cumsum(numeric[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
def group_shift_indexer(int64_t[::1] out, const int64_t[:] labels,
int ngroups, int periods):
cdef:
Py_ssize_t N, i, j, ii
int offset = 0, sign
int64_t lab, idxer, idxer_slot
int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
int64_t[:, :] label_indexer
int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64)
int64_t[:, ::1] label_indexer

N, = (<object>labels).shape

Expand Down Expand Up @@ -409,10 +409,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_any_all(uint8_t[:] out,
const uint8_t[:] values,
def group_any_all(uint8_t[::1] out,
const uint8_t[::1] values,
const int64_t[:] labels,
const uint8_t[:] mask,
const uint8_t[::1] mask,
object val_test,
bint skipna):
"""
Expand Down Expand Up @@ -478,8 +478,8 @@ ctypedef fused complexfloating_t:

@cython.wraparound(False)
@cython.boundscheck(False)
def _group_add(complexfloating_t[:, :] out,
int64_t[:] counts,
def _group_add(complexfloating_t[:, ::1] out,
int64_t[::1] counts,
ndarray[complexfloating_t, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
Expand All @@ -489,8 +489,8 @@ def _group_add(complexfloating_t[:, :] out,
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
complexfloating_t val, count, t, y
complexfloating_t[:, :] sumx, compensation
int64_t[:, :] nobs
complexfloating_t[:, ::1] sumx, compensation
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

if len_values != len_labels:
Expand Down Expand Up @@ -537,8 +537,8 @@ group_add_complex128 = _group_add['double complex']

@cython.wraparound(False)
@cython.boundscheck(False)
def _group_prod(floating[:, :] out,
int64_t[:] counts,
def _group_prod(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
Expand All @@ -548,8 +548,8 @@ def _group_prod(floating[:, :] out,
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count
floating[:, :] prodx
int64_t[:, :] nobs
floating[:, ::1] prodx
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

if len_values != len_labels:
Expand Down Expand Up @@ -590,17 +590,17 @@ group_prod_float64 = _group_prod['double']
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def _group_var(floating[:, :] out,
int64_t[:] counts,
def _group_var(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1,
int64_t ddof=1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, ct, oldmean
floating[:, :] mean
int64_t[:, :] nobs
floating[:, ::1] mean
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"
Expand Down Expand Up @@ -648,16 +648,16 @@ group_var_float64 = _group_var['double']

@cython.wraparound(False)
@cython.boundscheck(False)
def _group_mean(floating[:, :] out,
int64_t[:] counts,
def _group_mean(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
const int64_t[::1] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count, y, t
floating[:, :] sumx, compensation
int64_t[:, :] nobs
floating[:, ::1] sumx, compensation
int64_t[:, ::1] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"
Expand Down Expand Up @@ -704,8 +704,8 @@ group_mean_float64 = _group_mean['double']

@cython.wraparound(False)
@cython.boundscheck(False)
def _group_ohlc(floating[:, :] out,
int64_t[:] counts,
def _group_ohlc(floating[:, ::1] out,
int64_t[::1] counts,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
Expand Down Expand Up @@ -898,8 +898,8 @@ cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
# use `const rank_t[:, :] values`
@cython.wraparound(False)
@cython.boundscheck(False)
def group_last(rank_t[:, :] out,
int64_t[:] counts,
def group_last(rank_t[:, ::1] out,
int64_t[::1] counts,
ndarray[rank_t, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
Expand Down Expand Up @@ -990,8 +990,8 @@ def group_last(rank_t[:, :] out,
# use `const rank_t[:, :] values`
@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth(rank_t[:, :] out,
int64_t[:] counts,
def group_nth(rank_t[:, ::1] out,
int64_t[::1] counts,
ndarray[rank_t, ndim=2] values,
const int64_t[:] labels,
int64_t min_count=-1, int64_t rank=1
Expand Down Expand Up @@ -1083,7 +1083,7 @@ def group_nth(rank_t[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank(float64_t[:, :] out,
def group_rank(float64_t[:, ::1] out,
ndarray[rank_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
Expand Down Expand Up @@ -1154,8 +1154,8 @@ ctypedef fused groupby_t:

@cython.wraparound(False)
@cython.boundscheck(False)
def group_max(groupby_t[:, :] out,
int64_t[:] counts,
def group_max(groupby_t[:, ::1] out,
int64_t[::1] counts,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
Expand All @@ -1167,7 +1167,7 @@ def group_max(groupby_t[:, :] out,
groupby_t val, count, nan_val
ndarray[groupby_t, ndim=2] maxx
bint runtime_error = False
int64_t[:, :] nobs
int64_t[:, ::1] nobs

# TODO(cython 3.0):
# Instead of `labels.shape[0]` use `len(labels)`
Expand Down Expand Up @@ -1229,8 +1229,8 @@ def group_max(groupby_t[:, :] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_min(groupby_t[:, :] out,
int64_t[:] counts,
def group_min(groupby_t[:, ::1] out,
int64_t[::1] counts,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
Expand All @@ -1242,7 +1242,7 @@ def group_min(groupby_t[:, :] out,
groupby_t val, count, nan_val
ndarray[groupby_t, ndim=2] minx
bint runtime_error = False
int64_t[:, :] nobs
int64_t[:, ::1] nobs

# TODO(cython 3.0):
# Instead of `labels.shape[0]` use `len(labels)`
Expand Down Expand Up @@ -1302,7 +1302,7 @@ def group_min(groupby_t[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin(groupby_t[:, :] out,
def group_cummin(groupby_t[:, ::1] out,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
Expand Down Expand Up @@ -1362,7 +1362,7 @@ def group_cummin(groupby_t[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax(groupby_t[:, :] out,
def group_cummax(groupby_t[:, ::1] out,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def _create_binary_propagating_op(name, is_divmod=False):
return method


def _create_unary_propagating_op(name):
def _create_unary_propagating_op(name: str):
def method(self):
return NA

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ def _cython_operation(
result = self._aggregate(result, counts, values, codes, func, min_count)
elif kind == "transform":
result = maybe_fill(
np.empty_like(values, dtype=out_dtype), fill_value=np.nan
np.empty(values.shape, dtype=out_dtype), fill_value=np.nan
)

# TODO: min_count
Expand Down