@@ -484,3 +484,78 @@ def f(g):
484
484
485
485
groupby_agg_builtins1 = Benchmark ("df.groupby('jim').agg([sum, min, max])" , setup )
486
486
groupby_agg_builtins2 = Benchmark ("df.groupby(['jim', 'joe']).agg([sum, min, max])" , setup )
487
+
488
+ #----------------------------------------------------------------------
489
+ # groupby with a variable value for ngroups
490
+
491
+
492
+ ngroups_list = [100 , 10000 ]
493
+ no_arg_func_list = [
494
+ 'all' ,
495
+ 'any' ,
496
+ 'count' ,
497
+ 'cumcount' ,
498
+ 'cummax' ,
499
+ 'cummin' ,
500
+ 'cumprod' ,
501
+ 'cumsum' ,
502
+ 'describe' ,
503
+ 'diff' ,
504
+ 'first' ,
505
+ 'head' ,
506
+ 'last' ,
507
+ 'mad' ,
508
+ 'max' ,
509
+ 'mean' ,
510
+ 'median' ,
511
+ 'min' ,
512
+ 'nunique' ,
513
+ 'pct_change' ,
514
+ 'prod' ,
515
+ 'rank' ,
516
+ 'sem' ,
517
+ 'size' ,
518
+ 'skew' ,
519
+ 'std' ,
520
+ 'sum' ,
521
+ 'tail' ,
522
+ 'unique' ,
523
+ 'var' ,
524
+ 'value_counts' ,
525
+ ]
526
+
527
+
528
+ _stmt_template = "df.groupby('value')['timestamp'].%s"
529
+ _setup_template = common_setup + """
530
+ np.random.seed(1234)
531
+ ngroups = %s
532
+ size = ngroups * 2
533
+ rng = np.arange(ngroups)
534
+ df = DataFrame(dict(
535
+ timestamp=rng.take(np.random.randint(0, ngroups, size=size)),
536
+ value=np.random.randint(0, size, size=size)
537
+ ))
538
+ """
539
+ START_DATE = datetime (2011 , 7 , 1 )
540
+
541
+
542
+ def make_large_ngroups_bmark (ngroups , func_name , func_args = '' ):
543
+ bmark_name = 'groupby_ngroups_%s_%s' % (ngroups , func_name )
544
+ stmt = _stmt_template % ('%s(%s)' % (func_name , func_args ))
545
+ setup = _setup_template % ngroups
546
+ bmark = Benchmark (stmt , setup , start_date = START_DATE )
547
+ # MUST set name
548
+ bmark .name = bmark_name
549
+ return bmark
550
+
551
+
552
+ def inject_bmark_into_globals (bmark ):
553
+ if not bmark .name :
554
+ raise AssertionError ('benchmark must have a name' )
555
+ globals ()[bmark .name ] = bmark
556
+
557
+
558
+ for ngroups in ngroups_list :
559
+ for func_name in no_arg_func_list :
560
+ bmark = make_large_ngroups_bmark (ngroups , func_name )
561
+ inject_bmark_into_globals (bmark )
0 commit comments