Skip to content

Commit 1e62a59

Browse files
committed
getting rid of complex_group/float_group
1 parent 6846643 commit 1e62a59

File tree

4 files changed

+79
-52
lines changed

4 files changed

+79
-52
lines changed

pandas/_libs/hashtable.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ cnp.import_array()
1515
from pandas._libs cimport util
1616
from pandas._libs.khash cimport (
1717
KHASH_TRACE_DOMAIN,
18-
are_equal_khcomplex64_t,
19-
are_equal_khcomplex128_t,
18+
are_equivalent_float32_t,
19+
are_equivalent_float64_t,
20+
are_equivalent_khcomplex64_t,
21+
are_equivalent_khcomplex128_t,
2022
kh_str_t,
2123
khcomplex64_t,
2224
khcomplex128_t,

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,44 @@ cdef bint is_nan_kh{{name}}_t(kh{{name}}_t val) nogil:
3030
{{endfor}}
3131

3232

33+
{{py:
34+
35+
# name
36+
float_types = ['float64_t',
37+
'float32_t']
38+
}}
39+
40+
{{for c_type in float_types}}
41+
42+
cdef bint is_nan_{{c_type}}({{c_type}} val) nogil:
43+
return val != val
44+
{{endfor}}
45+
46+
47+
{{py:
48+
49+
50+
# name
51+
int_types = ['int64_t',
52+
'int32_t',
53+
'int16_t',
54+
'int8_t',
55+
'uint64_t',
56+
'uint32_t',
57+
'uint16_t',
58+
'uint8_t',]
59+
}}
60+
61+
{{for c_type in int_types}}
62+
63+
cdef bint is_nan_{{c_type}}({{c_type}} val) nogil:
64+
return False
65+
66+
cdef bint are_equivalent_{{c_type}}({{c_type}} val1, {{c_type}} val2) nogil:
67+
return val1 == val2
68+
{{endfor}}
69+
70+
3371
{{py:
3472

3573
# name
@@ -336,24 +374,24 @@ cdef class HashTable:
336374

337375
{{py:
338376

339-
# name, dtype, c_type, float_group, complex_group, to_c_type
340-
dtypes = [('Complex128', 'complex128', 'khcomplex128_t', True, True, "to_khcomplex128_t"),
341-
('Float64', 'float64', 'float64_t', True, False, ""),
342-
('UInt64', 'uint64', 'uint64_t', False, False, ""),
343-
('Int64', 'int64', 'int64_t', False, False, ""),
344-
('Complex64', 'complex64', 'khcomplex64_t', True, True, "to_khcomplex64_t"),
345-
('Float32', 'float32', 'float32_t', True, False, ""),
346-
('UInt32', 'uint32', 'uint32_t', False, False, ""),
347-
('Int32', 'int32', 'int32_t', False, False, ""),
348-
('UInt16', 'uint16', 'uint16_t', False, False, ""),
349-
('Int16', 'int16', 'int16_t', False, False, ""),
350-
('UInt8', 'uint8', 'uint8_t', False, False, ""),
351-
('Int8', 'int8', 'int8_t', False, False, "")]
377+
# name, dtype, c_type, to_c_type
378+
dtypes = [('Complex128', 'complex128', 'khcomplex128_t', "to_khcomplex128_t"),
379+
('Float64', 'float64', 'float64_t', ""),
380+
('UInt64', 'uint64', 'uint64_t', ""),
381+
('Int64', 'int64', 'int64_t', ""),
382+
('Complex64', 'complex64', 'khcomplex64_t', "to_khcomplex64_t"),
383+
('Float32', 'float32', 'float32_t', ""),
384+
('UInt32', 'uint32', 'uint32_t', ""),
385+
('Int32', 'int32', 'int32_t', ""),
386+
('UInt16', 'uint16', 'uint16_t', ""),
387+
('Int16', 'int16', 'int16_t', ""),
388+
('UInt8', 'uint8', 'uint8_t', ""),
389+
('Int8', 'int8', 'int8_t', "")]
352390

353391
}}
354392

355393

356-
{{for name, dtype, c_type, float_group, complex_group, to_c_type in dtypes}}
394+
{{for name, dtype, c_type, to_c_type in dtypes}}
357395

358396
cdef class {{name}}HashTable(HashTable):
359397

@@ -539,14 +577,8 @@ cdef class {{name}}HashTable(HashTable):
539577
labels[i] = na_sentinel
540578
continue
541579
elif ignore_na and (
542-
{{if complex_group}}
543-
not is_nan_{{c_type}}(val) or
544-
(use_na_value and are_equal_{{c_type}}(val,na_value2))
545-
{{elif float_group}}
546-
val != val or (use_na_value and val == na_value2)
547-
{{else}}
548-
(use_na_value and val == na_value2)
549-
{{endif}}
580+
is_nan_{{c_type}}(val) or
581+
(use_na_value and are_equivalent_{{c_type}}(val,na_value2))
550582
):
551583
# if missing values do not count as unique values (i.e. if
552584
# ignore_na is True), skip the hashtable entry for them,

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,24 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
66

77
{{py:
88

9-
# dtype, ttype, c_type, complex_group, to_c_type
10-
dtypes = [('complex128', 'complex128', 'khcomplex128_t', True, "to_khcomplex128_t"),
11-
('complex64', 'complex64', 'khcomplex64_t', True, "to_khcomplex64_t"),
12-
('float64', 'float64', 'float64_t', False, ""),
13-
('float32', 'float32', 'float32_t', False, ""),
14-
('uint64', 'uint64', 'uint64_t', False, ""),
15-
('uint32', 'uint32', 'uint32_t', False, ""),
16-
('uint16', 'uint16', 'uint16_t', False, ""),
17-
('uint8', 'uint8', 'uint8_t', False, ""),
18-
('object', 'pymap', 'object', False, ""),
19-
('int64', 'int64', 'int64_t', False, ""),
20-
('int32', 'int32', 'int32_t', False, ""),
21-
('int16', 'int16', 'int16_t', False, ""),
22-
('int8', 'int8', 'int8_t', False, "")]
9+
# dtype, ttype, c_type, to_c_type, to_dtype
10+
dtypes = [('complex128', 'complex128', 'khcomplex128_t', "to_khcomplex128_t", "to_complex128"),
11+
('complex64', 'complex64', 'khcomplex64_t', "to_khcomplex64_t", "to_complex64"),
12+
('float64', 'float64', 'float64_t', "", ""),
13+
('float32', 'float32', 'float32_t', "", ""),
14+
('uint64', 'uint64', 'uint64_t', "", ""),
15+
('uint32', 'uint32', 'uint32_t', "", ""),
16+
('uint16', 'uint16', 'uint16_t', "", ""),
17+
('uint8', 'uint8', 'uint8_t', "", ""),
18+
('object', 'pymap', 'object', "", ""),
19+
('int64', 'int64', 'int64_t', "", ""),
20+
('int32', 'int32', 'int32_t', "", ""),
21+
('int16', 'int16', 'int16_t', "", ""),
22+
('int8', 'int8', 'int8_t', "", "")]
2323

2424
}}
2525

26-
{{for dtype, ttype, c_type, complex_group, to_c_type in dtypes}}
26+
{{for dtype, ttype, c_type, to_c_type, to_dtype in dtypes}}
2727

2828

2929
@cython.wraparound(False)
@@ -62,13 +62,7 @@ cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
6262
for i in range(n):
6363
val = {{to_c_type}}(values[i])
6464

65-
{{if dtype == 'float64' or dtype == 'float32'}}
66-
if val == val or not dropna:
67-
{{elif complex_group}}
6865
if not is_nan_{{c_type}}(val) or not dropna:
69-
{{else}}
70-
if True:
71-
{{endif}}
7266
k = kh_get_{{ttype}}(table, val)
7367
if k != table.n_buckets:
7468
table.vals[k] += 1
@@ -117,11 +111,7 @@ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
117111
with nogil:
118112
for k in range(table.n_buckets):
119113
if kh_exist_{{ttype}}(table, k):
120-
{{if complex_group}}
121-
result_keys[i] = to_{{dtype}}(table.keys[k])
122-
{{else}}
123-
result_keys[i] = table.keys[k]
124-
{{endif}}
114+
result_keys[i] = {{to_dtype}}(table.keys[k])
125115
result_counts[i] = table.vals[k]
126116
i += 1
127117
{{endif}}

pandas/_libs/khash.pxd

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@ cdef extern from "khash_python.h":
2525
double real
2626
double imag
2727

28-
bint are_equal_khcomplex128_t "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil
28+
bint are_equivalent_khcomplex128_t "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil
2929

3030
ctypedef struct khcomplex64_t:
3131
float real
3232
float imag
3333

34-
bint are_equal_khcomplex64_t "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil
34+
bint are_equivalent_khcomplex64_t "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil
35+
36+
bint are_equivalent_float64_t "kh_floats_hash_equal" (float64_t a, float64_t b) nogil
37+
bint are_equivalent_float32_t "kh_floats_hash_equal" (float32_t a, float32_t b) nogil
3538

3639
ctypedef struct kh_pymap_t:
3740
khint_t n_buckets, size, n_occupied, upper_bound

0 commit comments

Comments
 (0)