From 49f06ed19d07ff598ef181e524e7a91c2487fd64 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 08:20:31 -0700 Subject: [PATCH 01/12] use fused types for some sparse functions --- pandas/_libs/sparse_op_helper.pxi.in | 86 ++++++++++------------------ 1 file changed, 29 insertions(+), 57 deletions(-) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 2843a3cf7dd28..9e60dbf495393 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -8,18 +8,12 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # Sparse op #---------------------------------------------------------------------- -{{py: - -# dtype, float_group -dtypes = [('float64', True), ('int64', False)] - -}} +ctypedef fused sparse_t: + float64_t + int64_t -{{for dtype, float_group in dtypes}} -{{if float_group}} - -cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): +cdef inline float64_t __div(sparse_t a, sparse_t b): if b == 0: if a > 0: return INF @@ -30,63 +24,41 @@ cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): else: return float(a) / b -cdef inline {{dtype}}_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): - return __div_{{dtype}}(a, b) -cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): - if b == 0: - # numpy >= 1.11 returns NaN - # for a // 0, rather than +-inf - if _np_version_under1p11: - if a > 0: - return INF - elif a < 0: - return -INF - return NaN - else: - return a // b +cdef inline float64_t __truediv(sparse_t a, sparse_t b): + return __div(a, b) -cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): - if b == 0: - return NaN - else: - return a % b - -{{else}} -cdef inline float64_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): +cdef inline sparse_t __mod(sparse_t a, sparse_t b): if b == 0: - if a > 0: - return INF - elif a < 0: - return -INF - else: + if sparse_t is float64_t: return NaN + else: + return 0 else: - return float(a) / b + return a % b -cdef inline float64_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): - return __div_{{dtype}}(a, b) -cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): +cdef inline sparse_t __floordiv(sparse_t a, sparse_t b): if b == 0: - return 0 + if sparse_t is float64_t: + # numpy >= 1.11 returns NaN + # for a // 0, rather than +-inf + if _np_version_under1p11: + if a > 0: + return INF + elif a < 0: + return -INF + return NaN + else: + return 0 else: return a // b -cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): - if b == 0: - return 0 - else: - return a % b -{{endif}} - -{{endfor}} - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # sparse array op -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: @@ -106,10 +78,10 @@ def get_op(tup): ops_dict = {'add': '{0} + {1}', 'sub': '{0} - {1}', 'mul': '{0} * {1}', - 'div': '__div_{2}({0}, {1})', - 'mod': '__mod_{2}({0}, {1})', - 'truediv': '__truediv_{2}({0}, {1})', - 'floordiv': '__floordiv_{2}({0}, {1})', + 'div': '__div({0}, {1})', + 'mod': '__mod({0}, {1})', + 'truediv': '__truediv({0}, {1})', + 'floordiv': '__floordiv({0}, {1})', 'pow': '{0} ** {1}', 'eq': '{0} == {1}', 'ne': '{0} != {1}', From d24ec562ae2a6fd405f3d13c7ca7ace6f33abf77 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 10:36:28 -0700 Subject: [PATCH 02/12] use fused types in groupby_helper --- pandas/_libs/groupby_helper.pxi.in | 55 +++++++++++++++++++----------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 765381d89705d..e6ee8d345df55 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -600,7 +600,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, {{py: -# name, c_type, dest_type2, nan_val +# name, dest_type2, nan_val, inf_val dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'), ('float32', 'float32_t', 'NAN', 'np.inf'), ('int64', 'int64_t', 'iNaT', '_int64_max')] @@ -766,25 +766,36 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, accum[lab, j] = mval = val out[i, j] = mval +{{endfor}} + + +ctypedef fused groupby_t: + float64_t + float32_t + int64_t + @cython.boundscheck(False) @cython.wraparound(False) -def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - bint is_datetimelike): +def group_cummax(ndarray[groupby_t, ndim=2] out, + ndarray[groupby_t, ndim=2] values, + ndarray[int64_t] labels, + bint is_datetimelike): """ Only transforms on axis=0 """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, mval - ndarray[{{dest_type2}}, ndim=2] accum + groupby_t val, mval + ndarray[groupby_t, ndim=2] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) - accum.fill(-{{inf_val}}) + if groupby_t is int64_t: + accum.fill(-_int64_max) + else: + accum.fill(-np.inf) with nogil: for i in range(N): @@ -795,16 +806,22 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, for j in range(K): val = values[i, j] - {{if name == 'int64'}} - if is_datetimelike and val == {{nan_val}}: - out[i, j] = {{nan_val}} + if groupby_t is int64_t: + if is_datetimelike and val == iNaT: + out[i, j] = iNaT + else: + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval else: - {{else}} - if val == val: - {{endif}} - mval = accum[lab, j] - if val > mval: - accum[lab, j] = mval = val - out[i, j] = mval + if val == val: + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval -{{endfor}} + +group_cummax_float64 = group_cummax["float64_t"] +group_cummax_float32 = group_cummax["float32_t"] +group_cummax_int64 = group_cummax["int64_t"] From 54520e2a565e484f36c8503816eeee96ec4e5be1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 11:38:59 -0700 Subject: [PATCH 03/12] Use fused types for more of groupby_helper --- pandas/_libs/groupby_helper.pxi.in | 59 ++++++++++++++++++------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index e6ee8d345df55..3042affa63960 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -725,24 +725,37 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, out[i, j] = minx[i, j] + +{{endfor}} + + +ctypedef fused groupby_t: + float64_t + float32_t + int64_t + + @cython.boundscheck(False) @cython.wraparound(False) -def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, - ndarray[{{dest_type2}}, ndim=2] values, - ndarray[int64_t] labels, - bint is_datetimelike): +def group_cummin(ndarray[groupby_t, ndim=2] out, + ndarray[groupby_t, ndim=2] values, + ndarray[int64_t] labels, + bint is_datetimelike): """ Only transforms on axis=0 """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, mval - ndarray[{{dest_type2}}, ndim=2] accum + groupby_t val, mval + ndarray[groupby_t, ndim=2] accum int64_t lab N, K = ( values).shape accum = np.empty_like(values) - accum.fill({{inf_val}}) + if groupby_t is int64_t: + accum.fill(_int64_max) + else: + accum.fill(np.inf) with nogil: for i in range(N): @@ -754,25 +767,25 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # val = nan - {{if name == 'int64'}} - if is_datetimelike and val == {{nan_val}}: - out[i, j] = {{nan_val}} + if groupby_t is int64_t: + if is_datetimelike and val == iNaT: + out[i, j] = iNaT + else: + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval else: - {{else}} - if val == val: - {{endif}} - mval = accum[lab, j] - if val < mval: - accum[lab, j] = mval = val - out[i, j] = mval - -{{endfor}} + if val == val: + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval -ctypedef fused groupby_t: - float64_t - float32_t - int64_t +group_cummin_float64 = group_cummin["float64_t"] +group_cummin_float32 = group_cummin["float32_t"] +group_cummin_int64 = group_cummin["int64_t"] @cython.boundscheck(False) From 1c7995804ee223d2d10280df46671e3a13460972 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 12:24:56 -0700 Subject: [PATCH 04/12] fuse more --- pandas/_libs/algos_common_helper.pxi.in | 32 +++++++++++-------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 9f531f36d1a64..92e8e9d61d251 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -16,33 +16,29 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: -# name, c_type, dest_type, dest_dtype -dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'), - ('float32', 'float32_t', 'float32_t', 'np.float32'), - ('int8', 'int8_t', 'float32_t', 'np.float32'), - ('int16', 'int16_t', 'float32_t', 'np.float32'), - ('int32', 'int32_t', 'float64_t', 'np.float64'), - ('int64', 'int64_t', 'float64_t', 'np.float64')] +# name, c_type, dest_type +dtypes = [('float64', 'float64_t', 'float64_t'), + ('float32', 'float32_t', 'float32_t'), + ('int8', 'int8_t', 'float32_t'), + ('int16', 'int16_t', 'float32_t'), + ('int32', 'int32_t', 'float64_t'), + ('int64', 'int64_t', 'float64_t')] def get_dispatch(dtypes): - for name, c_type, dest_type, dest_dtype, in dtypes: - - dest_type2 = dest_type - dest_type = dest_type.replace('_t', '') - - yield name, c_type, dest_type, dest_type2, dest_dtype + for name, c_type, dest_type, in dtypes: + yield name, c_type, dest_type }} -{{for name, c_type, dest_type, dest_type2, dest_dtype +{{for name, c_type, dest_type in get_dispatch(dtypes)}} @cython.boundscheck(False) @cython.wraparound(False) def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, - ndarray[{{dest_type2}}, ndim=2] out, + ndarray[{{dest_type}}, ndim=2] out, Py_ssize_t periods, int axis): cdef: Py_ssize_t i, j, sx, sy @@ -84,9 +80,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, out[i, j] = arr[i, j] - arr[i, j - periods] -def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, - ndarray[int64_t] indexer, Py_ssize_t loc, - ndarray[{{dest_type2}}] out): +def put2d_{{name}}_{{dest_type[:-2]}}(ndarray[{{c_type}}, ndim=2, cast=True] values, + ndarray[int64_t] indexer, Py_ssize_t loc, + ndarray[{{dest_type}}] out): cdef: Py_ssize_t i, j, k From e6002779e10a3bc738d9905bf415ee2f433e20bb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 12:26:19 -0700 Subject: [PATCH 05/12] remove unnecessary arg --- pandas/core/internals/blocks.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0e57dd33b1c4e..2b9f583cd2ebd 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1153,7 +1153,7 @@ def check_int_bool(self, inplace): inplace=inplace, limit=limit, fill_value=fill_value, coerce=coerce, - downcast=downcast, mgr=mgr) + downcast=downcast) # try an interp method try: m = missing.clean_interp_method(method, **kwargs) @@ -1169,13 +1169,14 @@ def check_int_bool(self, inplace): limit_direction=limit_direction, limit_area=limit_area, fill_value=fill_value, inplace=inplace, - downcast=downcast, mgr=mgr, **kwargs) + downcast=downcast, **kwargs) - raise ValueError("invalid method '{0}' to interpolate.".format(method)) + raise ValueError("invalid method '{method}' to interpolate." + .format(method=method)) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, coerce=False, - downcast=None, mgr=None): + downcast=None): """ fillna but using the interpolate machinery """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1202,7 +1203,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, limit_direction='forward', limit_area=None, - inplace=False, downcast=None, mgr=None, **kwargs): + inplace=False, downcast=None, **kwargs): """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1219,8 +1220,8 @@ def _interpolate(self, method=None, index=None, values=None, if method in ('krogh', 'piecewise_polynomial', 'pchip'): if not index.is_monotonic: - raise ValueError("{0} interpolation requires that the " - "index be monotonic.".format(method)) + raise ValueError("{method} interpolation requires that the " + "index be monotonic.".format(method=method)) # process 1-d slices in the axis direction def func(x): From 89997ee412e12bd741893572d682ad52c86ba083 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 19:14:23 -0700 Subject: [PATCH 06/12] cleanup and fuse --- pandas/_libs/groupby_helper.pxi.in | 102 +++++++++++++-------------- pandas/_libs/join_func_helper.pxi.in | 68 ++++++++++++------ 2 files changed, 95 insertions(+), 75 deletions(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 3042affa63960..ccc5aad425cec 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -14,26 +14,22 @@ _int64_max = np.iinfo(np.int64).max {{py: -# name, c_type, dest_type, dest_dtype -dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'), - ('float32', 'float32_t', 'float32_t', 'np.float32')] +# name, c_type +dtypes = [('float64', 'float64_t'), + ('float32', 'float32_t')] def get_dispatch(dtypes): - for name, c_type, dest_type, dest_dtype in dtypes: - - dest_type2 = dest_type - dest_type = dest_type.replace('_t', '') - - yield name, c_type, dest_type, dest_type2, dest_dtype + for name, c_type in dtypes: + yield name, c_type }} -{{for name, c_type, dest_type, dest_type2, dest_dtype in get_dispatch(dtypes)}} +{{for name, c_type in get_dispatch(dtypes)}} @cython.wraparound(False) @cython.boundscheck(False) -def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, @@ -43,8 +39,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] sumx, nobs + {{c_type}} val, count + ndarray[{{c_type}}, ndim=2] sumx, nobs if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") @@ -80,7 +76,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, @@ -90,8 +86,8 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] prodx, nobs + {{c_type}} val, count + ndarray[{{c_type}}, ndim=2] prodx, nobs if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") @@ -127,15 +123,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) -def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, ct, oldmean - ndarray[{{dest_type2}}, ndim=2] nobs, mean + {{c_type}} val, ct, oldmean + ndarray[{{c_type}}, ndim=2] nobs, mean assert min_count == -1, "'min_count' only used in add and prod" @@ -179,15 +175,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] sumx, nobs + {{c_type}} val, count + ndarray[{{c_type}}, ndim=2] sumx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -224,9 +220,9 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): """ @@ -234,7 +230,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab - {{dest_type2}} val, count + {{c_type}} val, count Py_ssize_t ngroups = len(counts) assert min_count == -1, "'min_count' only used in add and prod" @@ -278,26 +274,26 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{py: -# name, c_type, dest_type2, nan_val -dtypes = [('float64', 'float64_t', 'float64_t', 'NAN'), - ('float32', 'float32_t', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'int64_t', 'iNaT'), - ('object', 'object', 'object', 'NAN')] +# name, c_type, nan_val +dtypes = [('float64', 'float64_t', 'NAN'), + ('float32', 'float32_t', 'NAN'), + ('int64', 'int64_t', 'iNaT'), + ('object', 'object', 'NAN')] def get_dispatch(dtypes): - for name, c_type, dest_type2, nan_val in dtypes: + for name, c_type, nan_val in dtypes: - yield name, c_type, dest_type2, nan_val + yield name, c_type, nan_val }} -{{for name, c_type, dest_type2, nan_val in get_dispatch(dtypes)}} +{{for name, c_type, nan_val in get_dispatch(dtypes)}} @cython.wraparound(False) @cython.boundscheck(False) -def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, @@ -307,8 +303,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val - ndarray[{{dest_type2}}, ndim=2] resx + {{c_type}} val + ndarray[{{c_type}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -353,7 +349,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, int64_t rank, @@ -363,8 +359,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val - ndarray[{{dest_type2}}, ndim=2] resx + {{c_type}} val + ndarray[{{c_type}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -600,26 +596,26 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, {{py: -# name, dest_type2, nan_val, inf_val +# name, c_type, nan_val, inf_val dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'), ('float32', 'float32_t', 'NAN', 'np.inf'), ('int64', 'int64_t', 'iNaT', '_int64_max')] def get_dispatch(dtypes): - for name, dest_type2, nan_val, inf_val in dtypes: - yield name, dest_type2, nan_val, inf_val + for name, c_type, nan_val, inf_val in dtypes: + yield name, c_type, nan_val, inf_val }} -{{for name, dest_type2, nan_val, inf_val in get_dispatch(dtypes)}} +{{for name, c_type, nan_val, inf_val in get_dispatch(dtypes)}} @cython.wraparound(False) @cython.boundscheck(False) -def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): """ @@ -627,8 +623,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] maxx, nobs + {{c_type}} val, count + ndarray[{{c_type}}, ndim=2] maxx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -672,9 +668,9 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, +def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out, ndarray[int64_t] counts, - ndarray[{{dest_type2}}, ndim=2] values, + ndarray[{{c_type}}, ndim=2] values, ndarray[int64_t] labels, Py_ssize_t min_count=-1): """ @@ -682,8 +678,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count - ndarray[{{dest_type2}}, ndim=2] minx, nobs + {{c_type}} val, count + ndarray[{{c_type}}, ndim=2] minx, nobs assert min_count == -1, "'min_count' only used in add and prod" diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index a72b113a6fdb6..e579f43715315 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -211,34 +211,34 @@ def asof_join_nearest_{{on_dtype}}_by_{{by_dtype}}( {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # asof_join -#---------------------------------------------------------------------- - -{{py: - -# on_dtype -dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', - 'int8_t', 'int16_t', 'int32_t', 'int64_t', - 'float', 'double'] - -}} - -{{for on_dtype in dtypes}} - - -def asof_join_backward_{{on_dtype}}( - ndarray[{{on_dtype}}] left_values, - ndarray[{{on_dtype}}] right_values, - bint allow_exact_matches=1, - tolerance=None): +# ---------------------------------------------------------------------- + +ctypedef fused asof_t: + uint8_t + uint16_t + uint32_t + uint64_t + int8_t + int16_t + int32_t + int64_t + float + double + + +def asof_join_backward(ndarray[asof_t] left_values, + ndarray[asof_t] right_values, + bint allow_exact_matches=1, + tolerance=None): cdef: Py_ssize_t left_pos, right_pos, left_size, right_size ndarray[int64_t] left_indexer, right_indexer bint has_tolerance = 0 - {{on_dtype}} tolerance_ = 0 - {{on_dtype}} diff = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 # if we are using tolerance, set our objects if tolerance is not None: @@ -281,6 +281,30 @@ def asof_join_backward_{{on_dtype}}( return left_indexer, right_indexer +asof_join_backward_uint8_t = asof_join_backward["uint8_t"] +asof_join_backward_uint16_t = asof_join_backward["uint16_t"] +asof_join_backward_uint32_t = asof_join_backward["uint32_t"] +asof_join_backward_uint64_t = asof_join_backward["uint64_t"] +asof_join_backward_int8_t = asof_join_backward["int8_t"] +asof_join_backward_int16_t = asof_join_backward["int16_t"] +asof_join_backward_int32_t = asof_join_backward["int32_t"] +asof_join_backward_int64_t = asof_join_backward["int64_t"] +asof_join_backward_float = asof_join_backward["float"] +asof_join_backward_double = asof_join_backward["double"] + + +{{py: + +# on_dtype +dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', + 'int8_t', 'int16_t', 'int32_t', 'int64_t', + 'float', 'double'] + +}} + +{{for on_dtype in dtypes}} + + def asof_join_forward_{{on_dtype}}( ndarray[{{on_dtype}}] left_values, ndarray[{{on_dtype}}] right_values, From b13317b8ac32ce515a9b7dad212b391f72e1a60d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 20:13:10 -0700 Subject: [PATCH 07/12] revert non-central changes --- pandas/core/internals/blocks.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2b9f583cd2ebd..0e57dd33b1c4e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1153,7 +1153,7 @@ def check_int_bool(self, inplace): inplace=inplace, limit=limit, fill_value=fill_value, coerce=coerce, - downcast=downcast) + downcast=downcast, mgr=mgr) # try an interp method try: m = missing.clean_interp_method(method, **kwargs) @@ -1169,14 +1169,13 @@ def check_int_bool(self, inplace): limit_direction=limit_direction, limit_area=limit_area, fill_value=fill_value, inplace=inplace, - downcast=downcast, **kwargs) + downcast=downcast, mgr=mgr, **kwargs) - raise ValueError("invalid method '{method}' to interpolate." - .format(method=method)) + raise ValueError("invalid method '{0}' to interpolate.".format(method)) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, coerce=False, - downcast=None): + downcast=None, mgr=None): """ fillna but using the interpolate machinery """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1203,7 +1202,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, limit_direction='forward', limit_area=None, - inplace=False, downcast=None, **kwargs): + inplace=False, downcast=None, mgr=None, **kwargs): """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1220,8 +1219,8 @@ def _interpolate(self, method=None, index=None, values=None, if method in ('krogh', 'piecewise_polynomial', 'pchip'): if not index.is_monotonic: - raise ValueError("{method} interpolation requires that the " - "index be monotonic.".format(method=method)) + raise ValueError("{0} interpolation requires that the " + "index be monotonic.".format(method)) # process 1-d slices in the axis direction def func(x): From db9d796f9d670bef642f142997700f29f23b2db2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 7 Oct 2018 09:14:35 -0700 Subject: [PATCH 08/12] fuse more things --- pandas/_libs/algos_rank_helper.pxi.in | 56 +++++------- pandas/_libs/groupby_helper.pxi.in | 123 +++++++++++++------------- pandas/_libs/join_func_helper.pxi.in | 68 +++++++------- 3 files changed, 121 insertions(+), 126 deletions(-) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 130276ae0e73c..d7b08b0548810 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -131,45 +131,20 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', argsorted = _as.astype('i8') {{if dtype == 'object'}} - for i in range(n): - sum_ranks += i + 1 - dups += 1 - isnan = sorted_mask[i] - val = util.get_value_at(sorted_data, i) - - if isnan and keep_na: - ranks[argsorted[i]] = nan - continue - count += 1.0 - - if (i == n - 1 or - are_diff(util.get_value_at(sorted_data, i + 1), val) or - i == non_na_idx): - if tiebreak == TIEBREAK_AVERAGE: - for j in range(i - dups + 1, i + 1): - ranks[argsorted[j]] = sum_ranks / dups - elif tiebreak == TIEBREAK_MIN: - for j in range(i - dups + 1, i + 1): - ranks[argsorted[j]] = i - dups + 2 - elif tiebreak == TIEBREAK_MAX: - for j in range(i - dups + 1, i + 1): - ranks[argsorted[j]] = i + 1 - elif tiebreak == TIEBREAK_FIRST: - raise ValueError('first not supported for non-numeric data') - elif tiebreak == TIEBREAK_FIRST_DESCENDING: - for j in range(i - dups + 1, i + 1): - ranks[argsorted[j]] = 2 * i - j - dups + 2 - elif tiebreak == TIEBREAK_DENSE: - total_tie_count += 1 - for j in range(i - dups + 1, i + 1): - ranks[argsorted[j]] = total_tie_count - sum_ranks = dups = 0 + if True: {{else}} with nogil: + {{endif}} + # TODO: why does the 2d version not have a nogil block? for i in range(n): sum_ranks += i + 1 dups += 1 + + {{if dtype == 'object'}} + val = util.get_value_at(sorted_data, i) + {{else}} val = sorted_data[i] + {{endif}} {{if dtype != 'uint64'}} isnan = sorted_mask[i] @@ -180,8 +155,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', count += 1.0 - if (i == n - 1 or sorted_data[i + 1] != val or - i == non_na_idx): + {{if dtype == 'object'}} + if (i == n - 1 or + are_diff(util.get_value_at(sorted_data, i + 1), val) or + i == non_na_idx): + {{else}} + if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx): + {{endif}} + if tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = sum_ranks / dups @@ -192,8 +173,12 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: + {{if dtype == 'object'}} + raise ValueError('first not supported for non-numeric data') + {{else}} for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1 + {{endif}} elif tiebreak == TIEBREAK_FIRST_DESCENDING: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = 2 * i - j - dups + 2 @@ -202,7 +187,6 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = total_tie_count sum_ranks = dups = 0 - {{endif}} if pct: if tiebreak == TIEBREAK_DENSE: return ranks / total_tie_count diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index ccc5aad425cec..859bb66249c3b 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -466,7 +466,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # with mask, without obfuscating location of missing data # in values array masked_vals = np.array(values[:, 0], copy=True) - {{if name=='int64'}} + {{if name == 'int64'}} mask = (masked_vals == {{nan_val}}).astype(np.uint8) {{else}} mask = np.isnan(masked_vals).astype(np.uint8) @@ -590,41 +590,30 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_min, group_max -#---------------------------------------------------------------------- - -{{py: - -# name, c_type, nan_val, inf_val -dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'), - ('float32', 'float32_t', 'NAN', 'np.inf'), - ('int64', 'int64_t', 'iNaT', '_int64_max')] - -def get_dispatch(dtypes): - - for name, c_type, nan_val, inf_val in dtypes: - yield name, c_type, nan_val, inf_val -}} +# ---------------------------------------------------------------------- - -{{for name, c_type, nan_val, inf_val in get_dispatch(dtypes)}} +ctypedef fused groupby_t: + float64_t + float32_t + int64_t @cython.wraparound(False) @cython.boundscheck(False) -def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): +def group_max(ndarray[groupby_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[groupby_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{c_type}} val, count - ndarray[{{c_type}}, ndim=2] maxx, nobs + groupby_t val, count, nan_val + ndarray[groupby_t, ndim=2] maxx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -634,7 +623,12 @@ def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) maxx = np.empty_like(out) - maxx.fill(-{{inf_val}}) + if groupby_t is int64_t: + maxx.fill(-_int64_max) + nan_val = iNaT + else: + maxx.fill(-np.inf) + nan_val = NAN N, K = ( values).shape @@ -649,37 +643,44 @@ def group_max_{{name}}(ndarray[{{c_type}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} - nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val + if groupby_t is int64_t: + if val != nan_val: + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val + else: + if val == val and val != nan_val: + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: - out[i, j] = {{nan_val}} + out[i, j] = nan_val else: out[i, j] = maxx[i, j] +group_max_float64 = group_max["float64_t"] +group_max_float32 = group_max["float32_t"] +group_max_int64 = group_max["int64_t"] + + @cython.wraparound(False) @cython.boundscheck(False) -def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): +def group_min(ndarray[groupby_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[groupby_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{c_type}} val, count - ndarray[{{c_type}}, ndim=2] minx, nobs + groupby_t val, count, nan_val + ndarray[groupby_t, ndim=2] minx, nobs assert min_count == -1, "'min_count' only used in add and prod" @@ -689,7 +690,12 @@ def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out, nobs = np.zeros_like(out) minx = np.empty_like(out) - minx.fill({{inf_val}}) + if groupby_t is int64_t: + minx.fill(_int64_max) + nan_val = iNaT + else: + minx.fill(np.inf) + nan_val = NAN N, K = ( values).shape @@ -704,31 +710,28 @@ def group_min_{{name}}(ndarray[{{c_type}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} - nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val + if groupby_t is int64_t: + if val != nan_val: + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val + else: + if val == val and val != nan_val: + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val for i in range(ncounts): for j in range(K): if nobs[i, j] == 0: - out[i, j] = {{nan_val}} + out[i, j] = nan_val else: out[i, j] = minx[i, j] - -{{endfor}} - - -ctypedef fused groupby_t: - float64_t - float32_t - int64_t +group_min_float64 = group_min["float64_t"] +group_min_float32 = group_min["float32_t"] +group_min_int64 = group_min["int64_t"] @cython.boundscheck(False) diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index e579f43715315..c7369c6b18093 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -293,30 +293,17 @@ asof_join_backward_float = asof_join_backward["float"] asof_join_backward_double = asof_join_backward["double"] -{{py: - -# on_dtype -dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', - 'int8_t', 'int16_t', 'int32_t', 'int64_t', - 'float', 'double'] - -}} - -{{for on_dtype in dtypes}} - - -def asof_join_forward_{{on_dtype}}( - ndarray[{{on_dtype}}] left_values, - ndarray[{{on_dtype}}] right_values, - bint allow_exact_matches=1, - tolerance=None): +def asof_join_forward(ndarray[asof_t] left_values, + ndarray[asof_t] right_values, + bint allow_exact_matches=1, + tolerance=None): cdef: Py_ssize_t left_pos, right_pos, left_size, right_size ndarray[int64_t] left_indexer, right_indexer bint has_tolerance = 0 - {{on_dtype}} tolerance_ = 0 - {{on_dtype}} diff = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 # if we are using tolerance, set our objects if tolerance is not None: @@ -360,16 +347,27 @@ def asof_join_forward_{{on_dtype}}( return left_indexer, right_indexer -def asof_join_nearest_{{on_dtype}}( - ndarray[{{on_dtype}}] left_values, - ndarray[{{on_dtype}}] right_values, - bint allow_exact_matches=1, - tolerance=None): +asof_join_forward_uint8_t = asof_join_forward["uint8_t"] +asof_join_forward_uint16_t = asof_join_forward["uint16_t"] +asof_join_forward_uint32_t = asof_join_forward["uint32_t"] +asof_join_forward_uint64_t = asof_join_forward["uint64_t"] +asof_join_forward_int8_t = asof_join_forward["int8_t"] +asof_join_forward_int16_t = asof_join_forward["int16_t"] +asof_join_forward_int32_t = asof_join_forward["int32_t"] +asof_join_forward_int64_t = asof_join_forward["int64_t"] +asof_join_forward_float = asof_join_forward["float"] +asof_join_forward_double = asof_join_forward["double"] + + +def asof_join_nearest(ndarray[asof_t] left_values, + ndarray[asof_t] right_values, + bint allow_exact_matches=1, + tolerance=None): cdef: Py_ssize_t left_size, right_size, i ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri - {{on_dtype}} bdiff, fdiff + asof_t bdiff, fdiff left_size = len(left_values) right_size = len(right_values) @@ -378,10 +376,10 @@ def asof_join_nearest_{{on_dtype}}( right_indexer = np.empty(left_size, dtype=np.int64) # search both forward and backward - bli, bri = asof_join_backward_{{on_dtype}}(left_values, right_values, - allow_exact_matches, tolerance) - fli, fri = asof_join_forward_{{on_dtype}}(left_values, right_values, - allow_exact_matches, tolerance) + bli, bri = asof_join_backward(left_values, right_values, + allow_exact_matches, tolerance) + fli, fri = asof_join_forward(left_values, right_values, + allow_exact_matches, tolerance) for i in range(len(bri)): # choose timestamp from right with smaller difference @@ -395,4 +393,14 @@ def asof_join_nearest_{{on_dtype}}( return left_indexer, right_indexer -{{endfor}} + +asof_join_nearest_uint8_t = asof_join_nearest["uint8_t"] +asof_join_nearest_uint16_t = asof_join_nearest["uint16_t"] +asof_join_nearest_uint32_t = asof_join_nearest["uint32_t"] +asof_join_nearest_uint64_t = asof_join_nearest["uint64_t"] +asof_join_nearest_int8_t = asof_join_nearest["int8_t"] +asof_join_nearest_int16_t = asof_join_nearest["int16_t"] +asof_join_nearest_int32_t = asof_join_nearest["int32_t"] +asof_join_nearest_int64_t = asof_join_nearest["int64_t"] +asof_join_nearest_float = asof_join_nearest["float"] +asof_join_nearest_double = asof_join_nearest["double"] From a69438bcfaed0547acf0ac46054146a392aa0b95 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 11:03:48 -0700 Subject: [PATCH 09/12] nicer names --- pandas/_libs/sparse_op_helper.pxi.in | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 9e60dbf495393..d02a985de1d61 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -13,7 +13,7 @@ ctypedef fused sparse_t: int64_t -cdef inline float64_t __div(sparse_t a, sparse_t b): +cdef inline float64_t __div__(sparse_t a, sparse_t b): if b == 0: if a > 0: return INF @@ -25,11 +25,11 @@ cdef inline float64_t __div(sparse_t a, sparse_t b): return float(a) / b -cdef inline float64_t __truediv(sparse_t a, sparse_t b): - return __div(a, b) +cdef inline float64_t __truediv__(sparse_t a, sparse_t b): + return __div__(a, b) -cdef inline sparse_t __mod(sparse_t a, sparse_t b): +cdef inline sparse_t __mod__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: return NaN @@ -39,7 +39,7 @@ cdef inline sparse_t __mod(sparse_t a, sparse_t b): return a % b -cdef inline sparse_t __floordiv(sparse_t a, sparse_t b): +cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: # numpy >= 1.11 returns NaN @@ -78,10 +78,10 @@ def get_op(tup): ops_dict = {'add': '{0} + {1}', 'sub': '{0} - {1}', 'mul': '{0} * {1}', - 'div': '__div({0}, {1})', - 'mod': '__mod({0}, {1})', - 'truediv': '__truediv({0}, {1})', - 'floordiv': '__floordiv({0}, {1})', + 'div': '__div__({0}, {1})', + 'mod': '__mod__({0}, {1})', + 'truediv': '__truediv__({0}, {1})', + 'floordiv': '__floordiv__({0}, {1})', 'pow': '{0} ** {1}', 'eq': '{0} == {1}', 'ne': '{0} != {1}', From cdcde6c3c4dc7c1cbb3e76b38d4188b4091f0dd4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 11:08:01 -0700 Subject: [PATCH 10/12] requested comments/cleanups --- pandas/_libs/algos_common_helper.pxi.in | 11 ++++++----- pandas/_libs/groupby_helper.pxi.in | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 92e8e9d61d251..3e9670b140361 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -27,11 +27,12 @@ dtypes = [('float64', 'float64_t', 'float64_t'), def get_dispatch(dtypes): for name, c_type, dest_type, in dtypes: - yield name, c_type, dest_type + dest_name = dest_type[:-2] # i.e. strip "_t" + yield name, c_type, dest_type, dest_name }} -{{for name, c_type, dest_type +{{for name, c_type, dest_type, dest_name in get_dispatch(dtypes)}} @@ -80,9 +81,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, out[i, j] = arr[i, j] - arr[i, j - periods] -def put2d_{{name}}_{{dest_type[:-2]}}(ndarray[{{c_type}}, ndim=2, cast=True] values, - ndarray[int64_t] indexer, Py_ssize_t loc, - ndarray[{{dest_type}}] out): +def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, + ndarray[int64_t] indexer, Py_ssize_t loc, + ndarray[{{dest_type}}] out): cdef: Py_ssize_t i, j, k diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 859bb66249c3b..84c5ccfe9c65a 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -594,6 +594,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # group_min, group_max # ---------------------------------------------------------------------- +# TODO: consider implementing for more dtypes ctypedef fused groupby_t: float64_t float32_t @@ -624,6 +625,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx = np.empty_like(out) if groupby_t is int64_t: + # Note: evaluated at compile-time maxx.fill(-_int64_max) nan_val = iNaT else: From adbc67ce1a0e10d33a7d5a66d8601980410a8a4e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 11:50:03 -0700 Subject: [PATCH 11/12] Dummy commit to force CI From dc76269dca591f60cb1e4fa39114f4611eb4acc5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Oct 2018 13:33:29 -0700 Subject: [PATCH 12/12] wrap long line --- pandas/_libs/algos_rank_helper.pxi.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index d7b08b0548810..bb4aec75ed567 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -174,7 +174,8 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: {{if dtype == 'object'}} - raise ValueError('first not supported for non-numeric data') + raise ValueError('first not supported for ' + 'non-numeric data') {{else}} for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1