Skip to content

Commit eaefc5c

Browse files
authored
REF: share hashtable_func_helper code (#46090)
* REF: hashtable_func_helper * REF: de-duplicate using to_c_type pattern
1 parent 2fe0c70 commit eaefc5c

File tree

1 file changed

+24
-31
lines changed

1 file changed

+24
-31
lines changed

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dtypes = [('Complex128', 'complex128', 'complex128',
1717
('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
1818
('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
1919
('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
20-
('Object', 'object', 'pymap', 'object', ''),
20+
('Object', 'object', 'pymap', 'object', '<PyObject*>'),
2121
('Int64', 'int64', 'int64', 'int64_t', ''),
2222
('Int32', 'int32', 'int32', 'int32_t', ''),
2323
('Int16', 'int16', 'int16', 'int16_t', ''),
@@ -61,11 +61,11 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
6161
for i in range(n):
6262
val = values[i]
6363
if not dropna or not checknull(val):
64-
k = kh_get_{{ttype}}(table, <PyObject*>val)
64+
k = kh_get_{{ttype}}(table, {{to_c_type}}val)
6565
if k != table.n_buckets:
6666
table.vals[k] += 1
6767
else:
68-
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
68+
k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
6969
table.vals[k] = 1
7070
result_keys.append(val)
7171
{{else}}
@@ -110,6 +110,8 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
110110
int ret = 0
111111
{{if dtype != 'object'}}
112112
{{c_type}} value
113+
{{else}}
114+
PyObject* value
113115
{{endif}}
114116
Py_ssize_t i, n = len(values)
115117
khiter_t k
@@ -123,44 +125,33 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
123125

124126
if keep == 'last':
125127
{{if dtype == 'object'}}
126-
for i in range(n - 1, -1, -1):
127-
# equivalent: range(n)[::-1], which cython doesn't like in nogil
128-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
129-
out[i] = ret == 0
128+
if True:
130129
{{else}}
131130
with nogil:
131+
{{endif}}
132132
for i in range(n - 1, -1, -1):
133133
# equivalent: range(n)[::-1], which cython doesn't like in nogil
134134
value = {{to_c_type}}(values[i])
135135
kh_put_{{ttype}}(table, value, &ret)
136136
out[i] = ret == 0
137-
{{endif}}
137+
138138
elif keep == 'first':
139139
{{if dtype == 'object'}}
140-
for i in range(n):
141-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
142-
out[i] = ret == 0
140+
if True:
143141
{{else}}
144142
with nogil:
143+
{{endif}}
145144
for i in range(n):
146145
value = {{to_c_type}}(values[i])
147146
kh_put_{{ttype}}(table, value, &ret)
148147
out[i] = ret == 0
149-
{{endif}}
148+
150149
else:
151150
{{if dtype == 'object'}}
152-
for i in range(n):
153-
value = values[i]
154-
k = kh_get_{{ttype}}(table, <PyObject*>value)
155-
if k != table.n_buckets:
156-
out[table.vals[k]] = 1
157-
out[i] = 1
158-
else:
159-
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
160-
table.vals[k] = i
161-
out[i] = 0
151+
if True:
162152
{{else}}
163153
with nogil:
154+
{{endif}}
164155
for i in range(n):
165156
value = {{to_c_type}}(values[i])
166157
k = kh_get_{{ttype}}(table, value)
@@ -171,7 +162,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
171162
k = kh_put_{{ttype}}(table, value, &ret)
172163
table.vals[k] = i
173164
out[i] = 0
174-
{{endif}}
165+
175166
kh_destroy_{{ttype}}(table)
176167
return out
177168

@@ -206,39 +197,41 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
206197
khiter_t k
207198
int ret = 0
208199
ndarray[uint8_t] result
200+
201+
{{if dtype == "object"}}
202+
PyObject* val
203+
{{else}}
209204
{{c_type}} val
205+
{{endif}}
206+
210207
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
211208

212209
# construct the table
213210
n = len(values)
214211
kh_resize_{{ttype}}(table, n)
215212

216213
{{if dtype == 'object'}}
217-
for i in range(n):
218-
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
214+
if True:
219215
{{else}}
220216
with nogil:
217+
{{endif}}
221218
for i in range(n):
222219
val = {{to_c_type}}(values[i])
223220
kh_put_{{ttype}}(table, val, &ret)
224-
{{endif}}
225221

226222
# test membership
227223
n = len(arr)
228224
result = np.empty(n, dtype=np.uint8)
229225

230226
{{if dtype == 'object'}}
231-
for i in range(n):
232-
val = arr[i]
233-
k = kh_get_{{ttype}}(table, <PyObject*>val)
234-
result[i] = (k != table.n_buckets)
227+
if True:
235228
{{else}}
236229
with nogil:
230+
{{endif}}
237231
for i in range(n):
238232
val = {{to_c_type}}(arr[i])
239233
k = kh_get_{{ttype}}(table, val)
240234
result[i] = (k != table.n_buckets)
241-
{{endif}}
242235

243236
kh_destroy_{{ttype}}(table)
244237
return result.view(np.bool_)

0 commit comments

Comments
 (0)