@@ -722,35 +722,52 @@ def interweave(list_obj):
722
722
723
723
724
724
@pytest .mark .parametrize ("test_series" , [True , False ])
725
+ @pytest .mark .parametrize ("shuffle" , [True , False ])
725
726
@pytest .mark .parametrize ("periods,fill_method,limit" , [
726
727
(1 , 'ffill' , None ), (1 , 'ffill' , 1 ),
727
728
(1 , 'bfill' , None ), (1 , 'bfill' , 1 ),
728
729
(- 1 , 'ffill' , None ), (- 1 , 'ffill' , 1 ),
729
730
(- 1 , 'bfill' , None ), (- 1 , 'bfill' , 1 )])
730
- def test_pct_change (test_series , periods , fill_method , limit ):
731
+ def test_pct_change (test_series , shuffle , periods , fill_method , limit ):
732
+ # Groupby pct change uses an apply if monotonic and a vectorized operation if non-monotonic
733
+ # Shuffle parameter tests each
731
734
vals = [np .nan , np .nan , 1 , 2 , 4 , 10 , np .nan , np .nan ]
732
- exp_vals = Series (vals ).pct_change (periods = periods ,
733
- fill_method = fill_method ,
734
- limit = limit ).tolist ()
735
-
736
- df = DataFrame ({'key' : ['a' ] * len (vals ) + ['b' ] * len (vals ),
735
+ keys = ['a' , 'b' ]
736
+ df = DataFrame ({'key' : [k for j in list (map (lambda x : [x ] * len (vals ), keys )) for k in j ],
737
737
'vals' : vals * 2 })
738
+ if shuffle :
739
+ df = df .reindex (np .random .permutation (len (df ))).reset_index (drop = True )
740
+
741
+ manual_apply = []
742
+ for k in keys :
743
+ manual_apply .append (Series (df .loc [df .key == k , 'vals' ].values ).pct_change (periods = periods ,
744
+ fill_method = fill_method ,
745
+ limit = limit ))
746
+ exp_vals = pd .concat (manual_apply ).reset_index (drop = True )
747
+ exp = pd .DataFrame (exp_vals , columns = ['_pct_change' ])
738
748
grp = df .groupby ('key' )
739
749
740
750
def get_result (grp_obj ):
741
751
return grp_obj .pct_change (periods = periods ,
742
752
fill_method = fill_method ,
743
753
limit = limit )
744
754
755
+ # Specifically test when monotonic and not monotonic
756
+
745
757
if test_series :
746
- exp = pd .Series (exp_vals * 2 )
747
- exp .name = 'vals'
758
+ exp = exp .loc [:, '_pct_change' ]
748
759
grp = grp ['vals' ]
749
760
result = get_result (grp )
761
+ # Resort order by keys to compare to expected values
762
+ df .insert (0 , '_pct_change' , result )
763
+ result = df .sort_values (by = 'key' )
764
+ result = result .loc [:, '_pct_change' ]
765
+ result = result .reset_index (drop = True )
750
766
tm .assert_series_equal (result , exp )
751
767
else :
752
- exp = DataFrame ({'vals' : exp_vals * 2 })
753
768
result = get_result (grp )
769
+ result .reset_index (drop = True , inplace = True )
770
+ result .columns = ['_pct_change' ]
754
771
tm .assert_frame_equal (result , exp )
755
772
756
773
0 commit comments