diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index e5e64f8dc7b5f..be57d2cd5f844 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -17,7 +17,7 @@ dtypes = [('Complex128', 'complex128', 'complex128', ('UInt32', 'uint32', 'uint32', 'uint32_t', ''), ('UInt16', 'uint16', 'uint16', 'uint16_t', ''), ('UInt8', 'uint8', 'uint8', 'uint8_t', ''), - ('Object', 'object', 'pymap', 'object', ''), + ('Object', 'object', 'pymap', 'object', ''), ('Int64', 'int64', 'int64', 'int64_t', ''), ('Int32', 'int32', 'int32', 'int32_t', ''), ('Int16', 'int16', 'int16', 'int16_t', ''), @@ -61,11 +61,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna): for i in range(n): val = values[i] if not dropna or not checknull(val): - k = kh_get_{{ttype}}(table, val) + k = kh_get_{{ttype}}(table, {{to_c_type}}val) if k != table.n_buckets: table.vals[k] += 1 else: - k = kh_put_{{ttype}}(table, val, &ret) + k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret) table.vals[k] = 1 result_keys.append(val) {{else}} @@ -110,6 +110,8 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'): int ret = 0 {{if dtype != 'object'}} {{c_type}} value + {{else}} + PyObject* value {{endif}} Py_ssize_t i, n = len(values) khiter_t k @@ -123,44 +125,33 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'): if keep == 'last': {{if dtype == 'object'}} - for i in range(n - 1, -1, -1): - # equivalent: range(n)[::-1], which cython doesn't like in nogil - kh_put_{{ttype}}(table, values[i], &ret) - out[i] = ret == 0 + if True: {{else}} with nogil: + {{endif}} for i in range(n - 1, -1, -1): # equivalent: range(n)[::-1], which cython doesn't like in nogil value = {{to_c_type}}(values[i]) kh_put_{{ttype}}(table, value, &ret) out[i] = ret == 0 - {{endif}} + elif keep == 'first': {{if dtype == 'object'}} - for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) - out[i] = ret == 0 + if True: {{else}} with nogil: + {{endif}} for i in range(n): value = {{to_c_type}}(values[i]) kh_put_{{ttype}}(table, value, &ret) out[i] = ret == 0 - {{endif}} + else: {{if dtype == 'object'}} - for i in range(n): - value = values[i] - k = kh_get_{{ttype}}(table, value) - if k != table.n_buckets: - out[table.vals[k]] = 1 - out[i] = 1 - else: - k = kh_put_{{ttype}}(table, value, &ret) - table.vals[k] = i - out[i] = 0 + if True: {{else}} with nogil: + {{endif}} for i in range(n): value = {{to_c_type}}(values[i]) k = kh_get_{{ttype}}(table, value) @@ -171,7 +162,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'): k = kh_put_{{ttype}}(table, value, &ret) table.vals[k] = i out[i] = 0 - {{endif}} + kh_destroy_{{ttype}}(table) return out @@ -206,7 +197,13 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): khiter_t k int ret = 0 ndarray[uint8_t] result + + {{if dtype == "object"}} + PyObject* val + {{else}} {{c_type}} val + {{endif}} + kh_{{ttype}}_t *table = kh_init_{{ttype}}() # construct the table @@ -214,31 +211,27 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values): kh_resize_{{ttype}}(table, n) {{if dtype == 'object'}} - for i in range(n): - kh_put_{{ttype}}(table, values[i], &ret) + if True: {{else}} with nogil: + {{endif}} for i in range(n): val = {{to_c_type}}(values[i]) kh_put_{{ttype}}(table, val, &ret) - {{endif}} # test membership n = len(arr) result = np.empty(n, dtype=np.uint8) {{if dtype == 'object'}} - for i in range(n): - val = arr[i] - k = kh_get_{{ttype}}(table, val) - result[i] = (k != table.n_buckets) + if True: {{else}} with nogil: + {{endif}} for i in range(n): val = {{to_c_type}}(arr[i]) k = kh_get_{{ttype}}(table, val) result[i] = (k != table.n_buckets) - {{endif}} kh_destroy_{{ttype}}(table) return result.view(np.bool_)