@@ -125,7 +125,7 @@ def get_mask(self, value):
125
125
return res
126
126
return self ._val_bitarrs [value ].copy ()
127
127
128
- def num_indices (self , value , mask = None ):
128
+ def count (self , value , mask = None ):
129
129
'''Number of indices for the given `value`'''
130
130
if mask is not None :
131
131
if len (mask ) != self .n_input :
@@ -136,15 +136,15 @@ def num_indices(self, value, mask=None):
136
136
if mask is None :
137
137
return self ._n_input
138
138
return mask .count ()
139
- unique_idx = self ._unique_vals .get (_NoValue )
139
+ unique_idx = self ._unique_vals .get (value , _NoValue )
140
140
if unique_idx is not _NoValue :
141
141
if mask is not None :
142
142
if mask [unique_idx ]:
143
143
return 1
144
144
return 0
145
145
return 1
146
146
if mask is not None :
147
- return (self ._val_bitarrs [value ] & mask ).count
147
+ return (self ._val_bitarrs [value ] & mask ).count ()
148
148
return self ._val_bitarrs [value ].count ()
149
149
150
150
def get_value (self , idx ):
@@ -169,14 +169,14 @@ def to_list(self):
169
169
170
170
def extend (self , values ):
171
171
'''Add more values to the end of any existing ones'''
172
- curr_size = self ._n_input
172
+ init_size = self ._n_input
173
173
if isinstance (values , ValueIndices ):
174
174
other_is_vi = True
175
175
other_size = values ._n_input
176
176
else :
177
177
other_is_vi = False
178
178
other_size = len (values )
179
- final_size = curr_size + other_size
179
+ final_size = init_size + other_size
180
180
for ba in self ._val_bitarrs .values ():
181
181
ba .extend (zeros (other_size ))
182
182
if other_is_vi :
@@ -185,7 +185,7 @@ def extend(self, values):
185
185
self ._extend_const (values )
186
186
return
187
187
else :
188
- self ._rm_const ()
188
+ self ._rm_const (final_size )
189
189
elif values ._const_val is not _NoValue :
190
190
cval = values ._const_val
191
191
other_unique = {}
@@ -199,40 +199,49 @@ def extend(self, values):
199
199
other_unique = values ._unique_vals
200
200
other_bitarrs = values ._val_bitarrs
201
201
for val , other_idx in other_unique .items ():
202
- self ._ingest_single (val , final_size , curr_size , other_idx )
202
+ self ._ingest_single (val , final_size , init_size , other_idx )
203
203
for val , other_ba in other_bitarrs .items ():
204
204
curr_ba = self ._val_bitarrs .get (val )
205
205
if curr_ba is None :
206
206
curr_idx = self ._unique_vals .get (val )
207
207
if curr_idx is None :
208
- if curr_size == 0 :
208
+ if init_size == 0 :
209
209
new_ba = other_ba .copy ()
210
210
else :
211
- new_ba = zeros (curr_size )
211
+ new_ba = zeros (init_size )
212
212
new_ba .extend (other_ba )
213
213
else :
214
- new_ba = zeros (curr_size )
214
+ new_ba = zeros (init_size )
215
215
new_ba [curr_idx ] = True
216
216
new_ba .extend (other_ba )
217
217
del self ._unique_vals [val ]
218
218
self ._val_bitarrs [val ] = new_ba
219
219
else :
220
- curr_ba [curr_size :] |= other_ba
220
+ curr_ba [init_size :] |= other_ba
221
+ self ._n_input += other_ba .count ()
221
222
else :
222
223
for other_idx , val in enumerate (values ):
223
- self ._ingest_single (val , final_size , curr_size , other_idx )
224
- self ._n_input = final_size
224
+ self ._ingest_single (val , final_size , init_size , other_idx )
225
+ assert self ._n_input = = final_size
225
226
226
227
def append (self , value ):
227
228
'''Append another value as input'''
228
229
if self ._const_val == value :
229
230
self ._n_input += 1
230
231
return
231
232
elif self ._const_val is not _NoValue :
232
- self ._rm_const ()
233
+ self ._rm_const (self ._n_input + 1 )
234
+ self ._unique_vals [value ] = self ._n_input
235
+ self ._n_input += 1
236
+ return
237
+ if self ._n_input == 0 :
238
+ self ._const_val = value
239
+ self ._n_input += 1
240
+ return
233
241
curr_size = self ._n_input
234
242
found = False
235
243
for val , bitarr in self ._val_bitarrs .items ():
244
+ assert len (bitarr ) == self ._n_input
236
245
if val == value :
237
246
found = True
238
247
bitarr .append (True )
@@ -318,7 +327,7 @@ def get_block_size(self):
318
327
if rem != 0 :
319
328
return None
320
329
for val in self .values ():
321
- if self .num_indices (val ) != block_size :
330
+ if self .count (val ) != block_size :
322
331
return None
323
332
return block_size
324
333
@@ -335,32 +344,43 @@ def _extract_indices(self, ba):
335
344
except ValueError :
336
345
return
337
346
yield curr_idx
338
- start = curr_idx
347
+ start = curr_idx + 1
339
348
340
- def _ingest_single (self , val , final_size , curr_size , other_idx ):
349
+ def _ingest_single (self , val , final_size , init_size , other_idx ):
341
350
'''Helper to ingest single value from another collection'''
351
+ if val == self ._const_val :
352
+ self ._n_input += 1
353
+ return
354
+ elif self ._const_val is not _NoValue :
355
+ self ._rm_const (final_size )
356
+ if self ._n_input == 0 :
357
+ self ._const_val = val
358
+ self ._n_input += 1
359
+ return
360
+
342
361
curr_ba = self ._val_bitarrs .get (val )
343
362
if curr_ba is None :
344
363
curr_idx = self ._unique_vals .get (val )
345
364
if curr_idx is None :
346
- self ._unique_vals [val ] = curr_size + other_idx
365
+ self ._unique_vals [val ] = init_size + other_idx
347
366
else :
348
367
new_ba = zeros (final_size )
349
368
new_ba [curr_idx ] = True
350
- new_ba [curr_size + other_idx ] = True
369
+ new_ba [init_size + other_idx ] = True
351
370
self ._val_bitarrs [val ] = new_ba
352
371
del self ._unique_vals [val ]
353
372
else :
354
- curr_ba [curr_size + other_idx ] = True
373
+ curr_ba [init_size + other_idx ] = True
374
+ self ._n_input += 1
355
375
356
- def _rm_const (self ):
376
+ def _rm_const (self , final_size ):
357
377
assert self ._const_val is not _NoValue
358
378
if self ._n_input == 1 :
359
379
self ._unique_vals [self ._const_val ] = 0
360
380
else :
361
- self ._val_bitarrs [self ._const_val ] = bitarray ( self . _n_input )
362
- self ._val_bitarrs [self ._const_val ]. setall ( 1 )
363
- self ._const_val == _NoValue
381
+ self ._val_bitarrs [self ._const_val ] = zeros ( final_size )
382
+ self ._val_bitarrs [self ._const_val ][: self . _n_input ] = True
383
+ self ._const_val = _NoValue
364
384
365
385
def _extend_const (self , other ):
366
386
if self ._const_val != other ._const_val :
0 commit comments