1
- from cpython cimport PyObject
1
+ from cpython cimport PyObject, Py_INCREF
2
2
3
3
from khash cimport *
4
4
from numpy cimport *
@@ -29,144 +29,96 @@ def list_to_object_array(list obj):
29
29
return arr
30
30
31
31
32
- cdef extern from " kvec.h" :
33
-
34
- ctypedef struct kv_int64_t:
35
- size_t n, m
36
- int64_t* a
37
-
38
- ctypedef struct kv_double:
39
- size_t n, m
40
- double * a
41
-
42
- ctypedef struct kv_object_t:
43
- size_t n, m
44
- PyObject** a
45
-
46
- inline void kv_object_push(kv_object_t * v, PyObject* x)
47
- inline void kv_object_destroy(kv_object_t * v)
48
- inline void kv_int64_push(kv_int64_t * v, int64_t x)
49
- inline void kv_double_push(kv_double * v, double x)
50
-
32
+ cdef size_t _INIT_VEC_CAP = 32
51
33
52
34
cdef class ObjectVector:
53
35
54
36
cdef:
55
- bint owndata
56
- kv_object_t vec
37
+ size_t n, m
38
+ ndarray ao
39
+ PyObject ** data
57
40
58
41
def __cinit__ (self ):
59
- self .owndata = 1
42
+ self .n = 0
43
+ self .m = _INIT_VEC_CAP
44
+ self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
45
+ self .data = < PyObject** > self .ao.data
60
46
61
47
def __len__ (self ):
62
- return self .vec.n
63
-
64
- def to_array (self , xfer_data = True ):
65
- """ Here we use the __array__ method, that is called when numpy
66
- tries to get an array from the object."""
67
- cdef:
68
- npy_intp shape[1 ]
69
- ndarray result
70
-
71
- shape[0 ] = < npy_intp> self .vec.n
72
-
73
- # Create a 1D array, of length 'size'
74
- result = PyArray_SimpleNewFromData(1 , shape,
75
- np.NPY_OBJECT, self .vec.a)
76
-
77
- # urgh, mingw32 barfs because of this
48
+ return self .n
78
49
79
- if xfer_data:
80
- self .owndata = 0
81
- util.set_array_owndata(result)
82
-
83
- # return result
84
-
85
- return result.copy()
50
+ def to_array (self ):
51
+ self .ao.resize(self .n)
52
+ return self .ao
86
53
87
54
cdef inline append(self , object o):
88
- kv_object_push(& self .vec, < PyObject* > o)
55
+ if self .n == self .m:
56
+ self .m = self .m * 2
57
+ self .ao.resize(self .m)
58
+ self .data = < PyObject** > self .ao.data
89
59
90
- def __dealloc__ ( self ):
91
- if self .owndata:
92
- kv_object_destroy( & self .vec)
60
+ Py_INCREF(o)
61
+ self .data[ self .n] = < PyObject * > o
62
+ self .n += 1
93
63
94
64
95
65
cdef class Int64Vector:
96
66
97
67
cdef:
98
- bint owndata
99
- kv_int64_t vec
68
+ size_t n, m
69
+ ndarray ao
70
+ int64_t * data
100
71
101
72
def __cinit__ (self ):
102
- self .owndata = 1
73
+ self .n = 0
74
+ self .m = _INIT_VEC_CAP
75
+ self .ao = np.empty(_INIT_VEC_CAP, dtype = np.int64)
76
+ self .data = < int64_t* > self .ao.data
103
77
104
78
def __len__ (self ):
105
- return self .vec.n
106
-
107
- def to_array (self , xfer_data = True ):
108
- """ Here we use the __array__ method, that is called when numpy
109
- tries to get an array from the object."""
110
- cdef:
111
- npy_intp shape[1 ]
112
- ndarray result
113
-
114
- shape[0 ] = < npy_intp> self .vec.n
115
-
116
- # Create a 1D array, of length 'size'
117
- result = PyArray_SimpleNewFromData(1 , shape, np.NPY_INT64,
118
- self .vec.a)
119
-
120
- if xfer_data:
121
- self .owndata = 0
122
- util.set_array_owndata(result)
79
+ return self .n
123
80
124
- return result
81
+ def to_array (self ):
82
+ self .ao.resize(self .n)
83
+ return self .ao
125
84
126
85
cdef inline append(self , int64_t x):
127
- kv_int64_push(& self .vec, x)
86
+ if self .n == self .m:
87
+ self .m = self .m * 2
88
+ self .ao.resize(self .m)
89
+ self .data = < int64_t* > self .ao.data
128
90
129
- def __dealloc__ (self ):
130
- if self .owndata:
131
- free(self .vec.a)
91
+ self .data[self .n] = x
92
+ self .n += 1
132
93
133
94
cdef class Float64Vector:
134
95
135
96
cdef:
136
- bint owndata
137
- kv_double vec
97
+ size_t n, m
98
+ ndarray ao
99
+ float64_t * data
138
100
139
101
def __cinit__ (self ):
140
- self .owndata = 1
102
+ self .n = 0
103
+ self .m = _INIT_VEC_CAP
104
+ self .ao = np.empty(_INIT_VEC_CAP, dtype = np.float64)
105
+ self .data = < float64_t* > self .ao.data
141
106
142
107
def __len__ (self ):
143
- return self .vec.n
144
-
145
- def to_array (self , xfer_data = True ):
146
- """ Here we use the __array__ method, that is called when numpy
147
- tries to get an array from the object."""
148
- cdef:
149
- npy_intp shape[1 ]
150
- ndarray result
151
-
152
- shape[0 ] = < npy_intp> self .vec.n
153
-
154
- # Create a 1D array, of length 'size'
155
- result = PyArray_SimpleNewFromData(1 , shape, np.NPY_FLOAT64,
156
- self .vec.a)
108
+ return self .n
157
109
158
- if xfer_data:
159
- self .owndata = 0
160
- util.set_array_owndata(result)
161
-
162
- return result
110
+ def to_array (self ):
111
+ self .ao.resize(self .n)
112
+ return self .ao
163
113
164
114
cdef inline append(self , float64_t x):
165
- kv_double_push(& self .vec, x)
115
+ if self .n == self .m:
116
+ self .m = self .m * 2
117
+ self .ao.resize(self .m)
118
+ self .data = < float64_t* > self .ao.data
166
119
167
- def __dealloc__ (self ):
168
- if self .owndata:
169
- free(self .vec.a)
120
+ self .data[self .n] = x
121
+ self .n += 1
170
122
171
123
172
124
cdef class HashTable:
@@ -262,7 +214,7 @@ cdef class StringHashTable(HashTable):
262
214
uniques.append(val)
263
215
264
216
# return None
265
- return uniques.to_array(xfer_data = True )
217
+ return uniques.to_array()
266
218
267
219
def factorize (self , ndarray[object] values ):
268
220
cdef:
@@ -573,7 +525,7 @@ cdef class Int64HashTable(HashTable):
573
525
labels[i] = count
574
526
count += 1
575
527
576
- arr_uniques = uniques.to_array(xfer_data = True )
528
+ arr_uniques = uniques.to_array()
577
529
578
530
return labels, arr_uniques
579
531
@@ -587,8 +539,6 @@ cdef class Int64HashTable(HashTable):
587
539
khiter_t k
588
540
Int64Vector uniques = Int64Vector()
589
541
590
- # TODO: kvec
591
-
592
542
for i in range (n):
593
543
val = values[i]
594
544
k = kh_get_int64(self .table, val)
@@ -597,11 +547,7 @@ cdef class Int64HashTable(HashTable):
597
547
uniques.append(val)
598
548
count += 1
599
549
600
- result = uniques.to_array(xfer_data = True )
601
-
602
- # result = np.array(uniques, copy=False)
603
- # result.base = <PyObject*> uniques
604
- # Py_INCREF(uniques)
550
+ result = uniques.to_array()
605
551
606
552
return result
607
553
@@ -625,7 +571,7 @@ cdef class Float64HashTable(HashTable):
625
571
def factorize (self , ndarray[float64_t] values ):
626
572
uniques = Float64Vector()
627
573
labels = self .get_labels(values, uniques, 0 , - 1 )
628
- return uniques.to_array(xfer_data = True ), labels
574
+ return uniques.to_array(), labels
629
575
630
576
cpdef get_labels(self , ndarray[float64_t] values,
631
577
Float64Vector uniques,
@@ -698,8 +644,6 @@ cdef class Float64HashTable(HashTable):
698
644
Float64Vector uniques = Float64Vector()
699
645
bint seen_na = 0
700
646
701
- # TODO: kvec
702
-
703
647
for i in range (n):
704
648
val = values[i]
705
649
@@ -713,7 +657,7 @@ cdef class Float64HashTable(HashTable):
713
657
seen_na = 1
714
658
uniques.append(ONAN)
715
659
716
- return uniques.to_array(xfer_data = True )
660
+ return uniques.to_array()
717
661
718
662
cdef class PyObjectHashTable(HashTable):
719
663
cdef kh_pymap_t * table
@@ -839,11 +783,7 @@ cdef class PyObjectHashTable(HashTable):
839
783
seen_na = 1
840
784
uniques.append(ONAN)
841
785
842
- result = uniques.to_array(xfer_data = True )
843
-
844
- # result = np.array(uniques, copy=False)
845
- # result.base = <PyObject*> uniques
846
- # Py_INCREF(uniques)
786
+ result = uniques.to_array()
847
787
848
788
return result
849
789
@@ -903,7 +843,7 @@ cdef class Factorizer:
903
843
if labels.dtype != np.int_:
904
844
labels = labels.astype(np.int_)
905
845
906
- sorter = self .uniques.to_array(xfer_data = False ).argsort()
846
+ sorter = self .uniques.to_array().argsort()
907
847
reverse_indexer = np.empty(len (sorter), dtype = np.int_)
908
848
reverse_indexer.put(sorter, np.arange(len (sorter)))
909
849
@@ -940,7 +880,7 @@ cdef class Int64Factorizer:
940
880
if labels.dtype != np.int_:
941
881
labels = labels.astype(np.int_)
942
882
943
- sorter = self .uniques.to_array(xfer_data = False ).argsort()
883
+ sorter = self .uniques.to_array().argsort()
944
884
reverse_indexer = np.empty(len (sorter), dtype = np.int_)
945
885
reverse_indexer.put(sorter, np.arange(len (sorter)))
946
886
0 commit comments