@@ -279,12 +279,12 @@ def bucketcat(series, cats):
279
279
cats = np .asarray (cats )
280
280
281
281
unique_labels = np .unique (cats )
282
- unique_labels = unique_labels [notnull (unique_labels )]
282
+ unique_labels = unique_labels [com . notnull (unique_labels )]
283
283
284
284
# group by
285
285
data = {}
286
286
287
- for i , label in enumerate ( unique_labels ) :
287
+ for label in unique_labels :
288
288
data [label ] = series [cats == label ]
289
289
290
290
return DataFrame (data , columns = unique_labels )
@@ -331,15 +331,12 @@ def _bucketpanel_by(series, xby, yby, xbins, ybins):
331
331
xby = xby .reindex (series .index )
332
332
yby = yby .reindex (series .index )
333
333
334
- n = len (series )
335
- # indices = np.arange(n)
336
-
337
334
xlabels = _bucket_labels (xby .reindex (series .index ), xbins )
338
335
ylabels = _bucket_labels (yby .reindex (series .index ), ybins )
339
336
340
337
labels = _uniquify (xlabels , ylabels , xbins , ybins )
341
338
342
- mask = isnull (labels )
339
+ mask = com . isnull (labels )
343
340
labels [mask ] = - 1
344
341
345
342
unique_labels = np .unique (labels )
@@ -354,8 +351,8 @@ def relabel(key):
354
351
xlab = xlabels [pos ]
355
352
ylab = ylabels [pos ]
356
353
357
- return '%sx%s' % (int (xlab ) if notnull (xlab ) else 'NULL' ,
358
- int (ylab ) if notnull (ylab ) else 'NULL' )
354
+ return '%sx%s' % (int (xlab ) if com . notnull (xlab ) else 'NULL' ,
355
+ int (ylab ) if com . notnull (ylab ) else 'NULL' )
359
356
360
357
return bucketed .rename (columns = relabel )
361
358
@@ -372,7 +369,7 @@ def _bucketpanel_cat(series, xcat, ycat):
372
369
sorted_ylabels = ylabels .take (sorter )
373
370
374
371
unique_labels = np .unique (labels )
375
- unique_labels = unique_labels [notnull (unique_labels )]
372
+ unique_labels = unique_labels [com . notnull (unique_labels )]
376
373
377
374
locs = sorted_labels .searchsorted (unique_labels )
378
375
xkeys = sorted_xlabels .take (locs )
@@ -394,8 +391,6 @@ def _intern(values):
394
391
labels = uniqued .searchsorted (values )
395
392
return labels , uniqued
396
393
397
- def _intern_fast (values ):
398
- pass
399
394
400
395
def _uniquify (xlabels , ylabels , xbins , ybins ):
401
396
# encode the stuff, create unique label
@@ -405,19 +400,6 @@ def _uniquify(xlabels, ylabels, xbins, ybins):
405
400
406
401
return _xpiece + _ypiece
407
402
408
- def _cat_labels (labels ):
409
- # group by
410
- data = {}
411
-
412
- unique_labels = np .unique (labels )
413
- unique_labels = unique_labels [notnull (unique_labels )]
414
-
415
- for label in unique_labels :
416
- mask = labels == label
417
- data [stringified ] = series [mask ]
418
-
419
- return DataFrame (data , index = series .index )
420
-
421
403
def _bucket_labels (series , k ):
422
404
arr = np .asarray (series )
423
405
mask = np .isfinite (arr )
@@ -426,43 +408,12 @@ def _bucket_labels(series, k):
426
408
427
409
split = np .array_split (np .arange (n )[mask ].take (order ), k )
428
410
429
- bucketsize = n / k
430
-
431
411
mat = np .empty (n , dtype = float ) * np .NaN
432
412
for i , v in enumerate (split ):
433
413
mat [v ] = i
434
414
435
415
return mat + 1
436
416
437
- def makeQuantiles (series , n ):
438
- """
439
- Compute quantiles of input series.
440
-
441
- Parameters
442
- ----------
443
- series: Series
444
- Must have 'order' method and index
445
- n: int
446
- Number of quantile buckets
447
-
448
- Returns
449
- -------
450
- (edges, quantiles)
451
- edges: ith bucket --> (left edge, right edge)
452
- quantiles: ith bucket --> set of values
453
- """
454
- series = remove_na (series ).copy ()
455
- series = series .order ()
456
- quantiles = {}
457
- edges = {}
458
- T = float (len (series ))
459
- inc = T / n
460
- for i in range (n ):
461
- theSlice = series [inc * i :(i + 1 )* inc ]
462
- quantiles [i + 1 ] = theSlice
463
- edges [i + 1 ] = theSlice [0 ], theSlice [- 1 ]
464
- return edges , quantiles
465
-
466
417
def quantileTS (frame , percentile ):
467
418
"""
468
419
Return score at percentile for each point in time (cross-section)
@@ -477,10 +428,12 @@ def quantileTS(frame, percentile):
477
428
-------
478
429
Series (or TimeSeries)
479
430
"""
431
+ from pandas .compat .scipy import scoreatpercentile
432
+
480
433
def func (x ):
481
434
x = np .asarray (x .valid ())
482
435
if x .any ():
483
436
return scoreatpercentile (x , percentile )
484
437
else :
485
- return NaN
438
+ return np . nan
486
439
return frame .apply (func , axis = 1 )
0 commit comments