@@ -35,23 +35,6 @@ cdef extern from "Python.h":
35
35
36
36
cdef size_t _INIT_VEC_CAP = 32
37
37
38
- def list_to_object_array (list obj ):
39
- '''
40
- Convert list to object ndarray. Seriously can't believe I had to write this
41
- function
42
- '''
43
- cdef:
44
- Py_ssize_t i, n
45
- ndarray[object ] arr
46
-
47
- n = len (obj)
48
- arr = np.empty(n, dtype = object )
49
-
50
- for i from 0 <= i < n:
51
- arr[i] = obj[i]
52
-
53
- return arr
54
-
55
38
cdef class Vector:
56
39
pass
57
40
@@ -68,6 +51,9 @@ cdef class ObjectVector(Vector):
68
51
self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
69
52
self .data = < PyObject** > self .ao.data
70
53
54
+ def __len__ (self ):
55
+ return self .n
56
+
71
57
cdef inline append(self , object o):
72
58
if self .n == self .m:
73
59
self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -132,37 +118,53 @@ cdef class Int64Vector:
132
118
133
119
Int64VectorData_append(self .data, x)
134
120
121
+ ctypedef struct Float64VectorData:
122
+ float64_t * data
123
+ size_t n, m
124
+
125
+ cdef uint8_t Float64VectorData_needs_resize(Float64VectorData * data) nogil:
126
+ return data.n == data.m
127
+
128
+ cdef void Float64VectorData_append(Float64VectorData * data, float64_t x) nogil:
129
+
130
+ data.data[data.n] = x
131
+ data.n += 1
132
+
135
133
cdef class Float64Vector(Vector):
136
134
137
135
cdef:
138
- float64_t * data
139
- size_t n, m
136
+ Float64VectorData * data
140
137
ndarray ao
141
138
142
139
def __cinit__ (self ):
143
- self .n = 0
144
- self .m = _INIT_VEC_CAP
145
- self .ao = np.empty(_INIT_VEC_CAP, dtype = np.float64)
146
- self .data = < float64_t* > self .ao.data
140
+ self .data = < Float64VectorData * > PyMem_Malloc(sizeof(Float64VectorData))
141
+ self .data.n = 0
142
+ self .data.m = _INIT_VEC_CAP
143
+ self .ao = np.empty(self .data.m, dtype = np.float64)
144
+ self .data.data = < float64_t* > self .ao.data
147
145
148
146
cdef resize(self ):
149
- self .m = max (self .m * 2 , _INIT_VEC_CAP)
150
- self .ao.resize(self .m)
151
- self .data = < float64_t* > self .ao.data
147
+ self .data. m = max (self .data. m * 4 , _INIT_VEC_CAP)
148
+ self .ao.resize(self .data. m)
149
+ self .data.data = < float64_t* > self .ao.data
152
150
153
- cdef inline void append(self , float64_t x) nogil:
154
- if self .n == self .m:
155
- with gil:
156
- self .resize()
151
+ def __dealloc__ (self ):
152
+ PyMem_Free(self .data)
157
153
158
- self .data[ self .n] = x
159
- self .n += 1
154
+ def __len__ ( self ):
155
+ return self .data.n
160
156
161
157
def to_array (self ):
162
- self .ao.resize(self .n)
163
- self .m = self .n
158
+ self .ao.resize(self .data. n)
159
+ self .data. m = self .data .n
164
160
return self .ao
165
161
162
+ cdef inline void append(self , float64_t x):
163
+
164
+ if Float64VectorData_needs_resize(self .data):
165
+ self .resize()
166
+
167
+ Float64VectorData_append(self .data, x)
166
168
167
169
cdef class HashTable:
168
170
pass
@@ -459,13 +461,21 @@ cdef class Int64HashTable(HashTable):
459
461
int64_t val
460
462
khiter_t k
461
463
Int64Vector uniques = Int64Vector()
464
+ Int64VectorData * ud
462
465
463
- for i in range (n):
464
- val = values[i]
465
- k = kh_get_int64(self .table, val)
466
- if k == self .table.n_buckets:
467
- kh_put_int64(self .table, val, & ret)
468
- uniques.append(val)
466
+ ud = uniques.data
467
+
468
+ with nogil:
469
+ for i in range (n):
470
+ val = values[i]
471
+ k = kh_get_int64(self .table, val)
472
+ if k == self .table.n_buckets:
473
+ kh_put_int64(self .table, val, & ret)
474
+
475
+ if Int64VectorData_needs_resize(ud):
476
+ with gil:
477
+ uniques.resize()
478
+ Int64VectorData_append(ud, val)
469
479
470
480
result = uniques.to_array()
471
481
@@ -526,26 +536,33 @@ cdef class Float64HashTable(HashTable):
526
536
int ret = 0
527
537
float64_t val
528
538
khiter_t k
539
+ Float64VectorData * ud
529
540
530
541
labels = np.empty(n, dtype = np.int64)
542
+ ud = uniques.data
531
543
532
- for i in range (n):
533
- val = values[i]
544
+ with nogil:
545
+ for i in range (n):
546
+ val = values[i]
534
547
535
- if val != val:
536
- labels[i] = na_sentinel
537
- continue
548
+ if val != val:
549
+ labels[i] = na_sentinel
550
+ continue
538
551
539
- k = kh_get_float64(self .table, val)
540
- if k != self .table.n_buckets:
541
- idx = self .table.vals[k]
542
- labels[i] = idx
543
- else :
544
- k = kh_put_float64(self .table, val, & ret)
545
- self .table.vals[k] = count
546
- uniques.append(val)
547
- labels[i] = count
548
- count += 1
552
+ k = kh_get_float64(self .table, val)
553
+ if k != self .table.n_buckets:
554
+ idx = self .table.vals[k]
555
+ labels[i] = idx
556
+ else :
557
+ k = kh_put_float64(self .table, val, & ret)
558
+ self .table.vals[k] = count
559
+
560
+ if Float64VectorData_needs_resize(ud):
561
+ with gil:
562
+ uniques.resize()
563
+ Float64VectorData_append(ud, val)
564
+ labels[i] = count
565
+ count += 1
549
566
550
567
return labels
551
568
@@ -588,20 +605,33 @@ cdef class Float64HashTable(HashTable):
588
605
int ret = 0
589
606
float64_t val
590
607
khiter_t k
591
- Float64Vector uniques = Float64Vector()
592
608
bint seen_na = 0
609
+ Float64Vector uniques = Float64Vector()
610
+ Float64VectorData * ud
593
611
594
- for i in range (n):
595
- val = values[i]
612
+ ud = uniques.data
596
613
597
- if val == val:
598
- k = kh_get_float64(self .table, val)
599
- if k == self .table.n_buckets:
600
- kh_put_float64(self .table, val, & ret)
601
- uniques.append(val)
602
- elif not seen_na:
603
- seen_na = 1
604
- uniques.append(NAN)
614
+ with nogil:
615
+ for i in range (n):
616
+ val = values[i]
617
+
618
+ if val == val:
619
+ k = kh_get_float64(self .table, val)
620
+ if k == self .table.n_buckets:
621
+ kh_put_float64(self .table, val, & ret)
622
+
623
+ if Float64VectorData_needs_resize(ud):
624
+ with gil:
625
+ uniques.resize()
626
+ Float64VectorData_append(ud, val)
627
+
628
+ elif not seen_na:
629
+ seen_na = 1
630
+
631
+ if Float64VectorData_needs_resize(ud):
632
+ with gil:
633
+ uniques.resize()
634
+ Float64VectorData_append(ud, NAN)
605
635
606
636
return uniques.to_array()
607
637
0 commit comments