@@ -2914,7 +2914,7 @@ def test_groupby_with_timegrouper(self):
2914
2914
# TimeGrouper requires a sorted index
2915
2915
# also verifies that the resultant index has the correct name
2916
2916
import datetime as DT
2917
- df = DataFrame ({
2917
+ df_original = DataFrame ({
2918
2918
'Buyer' : 'Carl Carl Carl Carl Joe Carl' .split (),
2919
2919
'Quantity' : [18 ,3 ,5 ,1 ,9 ,3 ],
2920
2920
'Date' : [
@@ -2925,29 +2925,34 @@ def test_groupby_with_timegrouper(self):
2925
2925
DT .datetime (2013 ,12 ,2 ,12 ,0 ),
2926
2926
DT .datetime (2013 ,9 ,2 ,14 ,0 ),
2927
2927
]})
2928
- df = df .set_index (['Date' ])
2928
+
2929
+ # GH 6908 change target column's order
2930
+ df_reordered = df_original .sort (columns = 'Quantity' )
2929
2931
2930
- expected = DataFrame ({ 'Quantity' : np .nan },
2931
- index = date_range ('20130901 13:00:00' ,'20131205 13:00:00' ,
2932
- freq = '5D' ,name = 'Date' ,closed = 'left' ))
2933
- expected .iloc [[0 ,6 ,18 ],0 ] = np .array ([24. ,6. ,9. ],dtype = 'float64' )
2932
+ for df in [df_original , df_reordered ]:
2933
+ df = df .set_index (['Date' ])
2934
2934
2935
- result1 = df .resample ('5D' ,how = sum )
2936
- assert_frame_equal (result1 , expected )
2935
+ expected = DataFrame ({ 'Quantity' : np .nan },
2936
+ index = date_range ('20130901 13:00:00' ,'20131205 13:00:00' ,
2937
+ freq = '5D' ,name = 'Date' ,closed = 'left' ))
2938
+ expected .iloc [[0 ,6 ,18 ],0 ] = np .array ([24. ,6. ,9. ],dtype = 'float64' )
2937
2939
2938
- df_sorted = df .sort_index ()
2939
- result2 = df_sorted .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2940
- assert_frame_equal (result2 , expected )
2940
+ result1 = df .resample ('5D' ,how = sum )
2941
+ assert_frame_equal (result1 , expected )
2941
2942
2942
- result3 = df .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2943
- assert_frame_equal (result3 , expected )
2943
+ df_sorted = df .sort_index ()
2944
+ result2 = df_sorted .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2945
+ assert_frame_equal (result2 , expected )
2946
+
2947
+ result3 = df .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2948
+ assert_frame_equal (result3 , expected )
2944
2949
2945
2950
def test_groupby_with_timegrouper_methods (self ):
2946
2951
# GH 3881
2947
2952
# make sure API of timegrouper conforms
2948
2953
2949
2954
import datetime as DT
2950
- df = pd .DataFrame ({
2955
+ df_original = pd .DataFrame ({
2951
2956
'Branch' : 'A A A A A B' .split (),
2952
2957
'Buyer' : 'Carl Mark Carl Joe Joe Carl' .split (),
2953
2958
'Quantity' : [1 ,3 ,5 ,8 ,9 ,3 ],
@@ -2960,13 +2965,16 @@ def test_groupby_with_timegrouper_methods(self):
2960
2965
DT .datetime (2013 ,12 ,2 ,14 ,0 ),
2961
2966
]})
2962
2967
2963
- df = df .set_index ('Date' , drop = False )
2964
- g = df .groupby (pd .TimeGrouper ('6M' ))
2965
- self .assertTrue (g .group_keys )
2966
- self .assertTrue (isinstance (g .grouper ,pd .core .groupby .BinGrouper ))
2967
- groups = g .groups
2968
- self .assertTrue (isinstance (groups ,dict ))
2969
- self .assertTrue (len (groups ) == 3 )
2968
+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
2969
+
2970
+ for df in [df_original , df_sorted ]:
2971
+ df = df .set_index ('Date' , drop = False )
2972
+ g = df .groupby (pd .TimeGrouper ('6M' ))
2973
+ self .assertTrue (g .group_keys )
2974
+ self .assertTrue (isinstance (g .grouper ,pd .core .groupby .BinGrouper ))
2975
+ groups = g .groups
2976
+ self .assertTrue (isinstance (groups ,dict ))
2977
+ self .assertTrue (len (groups ) == 3 )
2970
2978
2971
2979
def test_timegrouper_with_reg_groups (self ):
2972
2980
@@ -2975,7 +2983,7 @@ def test_timegrouper_with_reg_groups(self):
2975
2983
2976
2984
import datetime as DT
2977
2985
2978
- df = DataFrame ({
2986
+ df_original = DataFrame ({
2979
2987
'Branch' : 'A A A A A A A B' .split (),
2980
2988
'Buyer' : 'Carl Mark Carl Carl Joe Joe Joe Carl' .split (),
2981
2989
'Quantity' : [1 ,3 ,5 ,1 ,8 ,1 ,9 ,3 ],
@@ -2990,32 +2998,34 @@ def test_timegrouper_with_reg_groups(self):
2990
2998
DT .datetime (2013 ,12 ,2 ,14 ,0 ),
2991
2999
]}).set_index ('Date' )
2992
3000
2993
- expected = DataFrame ({
2994
- 'Buyer' : 'Carl Joe Mark' .split (),
2995
- 'Quantity' : [10 ,18 ,3 ],
2996
- 'Date' : [
2997
- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
2998
- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
2999
- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3000
- ]}).set_index (['Date' ,'Buyer' ])
3001
-
3002
- result = df .groupby ([pd .Grouper (freq = 'A' ),'Buyer' ]).sum ()
3003
- assert_frame_equal (result ,expected )
3004
-
3005
- expected = DataFrame ({
3006
- 'Buyer' : 'Carl Mark Carl Joe' .split (),
3007
- 'Quantity' : [1 ,3 ,9 ,18 ],
3008
- 'Date' : [
3009
- DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3010
- DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3011
- DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3012
- DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3013
- ]}).set_index (['Date' ,'Buyer' ])
3014
-
3015
- result = df .groupby ([pd .Grouper (freq = '6MS' ),'Buyer' ]).sum ()
3016
- assert_frame_equal (result ,expected )
3017
-
3018
- df = DataFrame ({
3001
+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
3002
+
3003
+ for df in [df_original , df_sorted ]:
3004
+ expected = DataFrame ({
3005
+ 'Buyer' : 'Carl Joe Mark' .split (),
3006
+ 'Quantity' : [10 ,18 ,3 ],
3007
+ 'Date' : [
3008
+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3009
+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3010
+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3011
+ ]}).set_index (['Date' ,'Buyer' ])
3012
+
3013
+ result = df .groupby ([pd .Grouper (freq = 'A' ),'Buyer' ]).sum ()
3014
+ assert_frame_equal (result ,expected )
3015
+
3016
+ expected = DataFrame ({
3017
+ 'Buyer' : 'Carl Mark Carl Joe' .split (),
3018
+ 'Quantity' : [1 ,3 ,9 ,18 ],
3019
+ 'Date' : [
3020
+ DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3021
+ DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3022
+ DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3023
+ DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3024
+ ]}).set_index (['Date' ,'Buyer' ])
3025
+ result = df .groupby ([pd .Grouper (freq = '6MS' ),'Buyer' ]).sum ()
3026
+ assert_frame_equal (result ,expected )
3027
+
3028
+ df_original = DataFrame ({
3019
3029
'Branch' : 'A A A A A A A B' .split (),
3020
3030
'Buyer' : 'Carl Mark Carl Carl Joe Joe Joe Carl' .split (),
3021
3031
'Quantity' : [1 ,3 ,5 ,1 ,8 ,1 ,9 ,3 ],
@@ -3030,81 +3040,105 @@ def test_timegrouper_with_reg_groups(self):
3030
3040
DT .datetime (2013 ,10 ,2 ,14 ,0 ),
3031
3041
]}).set_index ('Date' )
3032
3042
3033
- expected = DataFrame ({
3034
- 'Buyer' : 'Carl Joe Mark Carl Joe' .split (),
3035
- 'Quantity' : [6 ,8 ,3 ,4 ,10 ],
3036
- 'Date' : [
3037
- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3038
- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3039
- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3040
- DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3041
- DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3042
- ]}).set_index (['Date' ,'Buyer' ])
3043
-
3044
- result = df .groupby ([pd .Grouper (freq = '1D' ),'Buyer' ]).sum ()
3045
- assert_frame_equal (result ,expected )
3046
-
3047
- result = df .groupby ([pd .Grouper (freq = '1M' ),'Buyer' ]).sum ()
3048
- expected = DataFrame ({
3049
- 'Buyer' : 'Carl Joe Mark' .split (),
3050
- 'Quantity' : [10 ,18 ,3 ],
3051
- 'Date' : [
3052
- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3053
- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3054
- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3055
- ]}).set_index (['Date' ,'Buyer' ])
3056
- assert_frame_equal (result ,expected )
3057
-
3058
- # passing the name
3059
- df = df .reset_index ()
3060
- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3061
- assert_frame_equal (result ,expected )
3062
-
3063
- self .assertRaises (KeyError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'foo' ),'Buyer' ]).sum ())
3064
-
3065
- # passing the level
3066
- df = df .set_index ('Date' )
3067
- result = df .groupby ([pd .Grouper (freq = '1M' ,level = 'Date' ),'Buyer' ]).sum ()
3068
- assert_frame_equal (result ,expected )
3069
- result = df .groupby ([pd .Grouper (freq = '1M' ,level = 0 ),'Buyer' ]).sum ()
3070
- assert_frame_equal (result ,expected )
3071
-
3072
- self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,level = 'foo' ),'Buyer' ]).sum ())
3073
-
3074
- # multi names
3075
- df = df .copy ()
3076
- df ['Date' ] = df .index + pd .offsets .MonthEnd (2 )
3077
- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3078
- expected = DataFrame ({
3079
- 'Buyer' : 'Carl Joe Mark' .split (),
3080
- 'Quantity' : [10 ,18 ,3 ],
3081
- 'Date' : [
3082
- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3083
- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3084
- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3085
- ]}).set_index (['Date' ,'Buyer' ])
3086
- assert_frame_equal (result ,expected )
3087
-
3088
- # error as we have both a level and a name!
3089
- self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ,level = 'Date' ),'Buyer' ]).sum ())
3090
-
3043
+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
3044
+ for df in [df_original , df_sorted ]:
3045
+
3046
+ expected = DataFrame ({
3047
+ 'Buyer' : 'Carl Joe Mark Carl Joe' .split (),
3048
+ 'Quantity' : [6 ,8 ,3 ,4 ,10 ],
3049
+ 'Date' : [
3050
+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3051
+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3052
+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3053
+ DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3054
+ DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3055
+ ]}).set_index (['Date' ,'Buyer' ])
3056
+
3057
+ result = df .groupby ([pd .Grouper (freq = '1D' ),'Buyer' ]).sum ()
3058
+ assert_frame_equal (result ,expected )
3059
+
3060
+ result = df .groupby ([pd .Grouper (freq = '1M' ),'Buyer' ]).sum ()
3061
+ expected = DataFrame ({
3062
+ 'Buyer' : 'Carl Joe Mark' .split (),
3063
+ 'Quantity' : [10 ,18 ,3 ],
3064
+ 'Date' : [
3065
+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3066
+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3067
+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3068
+ ]}).set_index (['Date' ,'Buyer' ])
3069
+ assert_frame_equal (result ,expected )
3070
+
3071
+ # passing the name
3072
+ df = df .reset_index ()
3073
+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3074
+ assert_frame_equal (result ,expected )
3075
+
3076
+ self .assertRaises (KeyError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'foo' ),'Buyer' ]).sum ())
3077
+
3078
+ # passing the level
3079
+ df = df .set_index ('Date' )
3080
+ result = df .groupby ([pd .Grouper (freq = '1M' ,level = 'Date' ),'Buyer' ]).sum ()
3081
+ assert_frame_equal (result ,expected )
3082
+ result = df .groupby ([pd .Grouper (freq = '1M' ,level = 0 ),'Buyer' ]).sum ()
3083
+ assert_frame_equal (result ,expected )
3084
+
3085
+ self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,level = 'foo' ),'Buyer' ]).sum ())
3086
+
3087
+ # multi names
3088
+ df = df .copy ()
3089
+ df ['Date' ] = df .index + pd .offsets .MonthEnd (2 )
3090
+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3091
+ expected = DataFrame ({
3092
+ 'Buyer' : 'Carl Joe Mark' .split (),
3093
+ 'Quantity' : [10 ,18 ,3 ],
3094
+ 'Date' : [
3095
+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3096
+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3097
+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3098
+ ]}).set_index (['Date' ,'Buyer' ])
3099
+ assert_frame_equal (result ,expected )
3100
+
3101
+ # error as we have both a level and a name!
3102
+ self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ,level = 'Date' ),'Buyer' ]).sum ())
3103
+
3104
+
3105
+ # single groupers
3106
+ expected = DataFrame ({ 'Quantity' : [31 ],
3107
+ 'Date' : [DT .datetime (2013 ,10 ,31 ,0 ,0 )] }).set_index ('Date' )
3108
+ result = df .groupby (pd .Grouper (freq = '1M' )).sum ()
3109
+ assert_frame_equal (result , expected )
3091
3110
3092
- # single groupers
3093
- expected = DataFrame ({ 'Quantity' : [31 ],
3094
- 'Date' : [DT .datetime (2013 ,10 ,31 ,0 ,0 )] }).set_index ('Date' )
3095
- result = df .groupby (pd .Grouper (freq = '1M' )).sum ()
3096
- assert_frame_equal (result , expected )
3111
+ result = df .groupby ([pd .Grouper (freq = '1M' )]).sum ()
3112
+ assert_frame_equal (result , expected )
3097
3113
3098
- result = df .groupby ([pd .Grouper (freq = '1M' )]).sum ()
3099
- assert_frame_equal (result , expected )
3114
+ expected = DataFrame ({ 'Quantity' : [31 ],
3115
+ 'Date' : [DT .datetime (2013 ,11 ,30 ,0 ,0 )] }).set_index ('Date' )
3116
+ result = df .groupby (pd .Grouper (freq = '1M' ,key = 'Date' )).sum ()
3117
+ assert_frame_equal (result , expected )
3100
3118
3101
- expected = DataFrame ({ 'Quantity' : [31 ],
3102
- 'Date' : [DT .datetime (2013 ,11 ,30 ,0 ,0 )] }).set_index ('Date' )
3103
- result = df .groupby (pd .Grouper (freq = '1M' ,key = 'Date' )).sum ()
3104
- assert_frame_equal (result , expected )
3119
+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' )]).sum ()
3120
+ assert_frame_equal (result , expected )
3105
3121
3106
- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' )]).sum ()
3107
- assert_frame_equal (result , expected )
3122
+ # GH 6764 multiple grouping with/without sort
3123
+ df = DataFrame ({
3124
+ 'date' : pd .to_datetime ([
3125
+ '20121002' ,'20121007' ,'20130130' ,'20130202' ,'20130305' ,'20121002' ,
3126
+ '20121207' ,'20130130' ,'20130202' ,'20130305' ,'20130202' ,'20130305' ]),
3127
+ 'user_id' : [1 ,1 ,1 ,1 ,1 ,3 ,3 ,3 ,5 ,5 ,5 ,5 ],
3128
+ 'whole_cost' : [1790 ,364 ,280 ,259 ,201 ,623 ,90 ,312 ,359 ,301 ,359 ,801 ],
3129
+ 'cost1' : [12 ,15 ,10 ,24 ,39 ,1 ,0 ,90 ,45 ,34 ,1 ,12 ] }).set_index ('date' )
3130
+
3131
+ for freq in ['D' , 'M' , 'A' , 'Q-APR' ]:
3132
+ expected = df .groupby ('user_id' )['whole_cost' ].resample (
3133
+ freq , how = 'sum' ).dropna ().reorder_levels (
3134
+ ['date' ,'user_id' ]).sortlevel ().astype ('int64' )
3135
+ expected .name = 'whole_cost'
3136
+
3137
+ result1 = df .sort_index ().groupby ([pd .TimeGrouper (freq = freq ), 'user_id' ])['whole_cost' ].sum ()
3138
+ assert_series_equal (result1 , expected )
3139
+
3140
+ result2 = df .groupby ([pd .TimeGrouper (freq = freq ), 'user_id' ])['whole_cost' ].sum ()
3141
+ assert_series_equal (result2 , expected )
3108
3142
3109
3143
def test_cumcount (self ):
3110
3144
df = DataFrame ([['a' ], ['a' ], ['a' ], ['b' ], ['a' ]], columns = ['A' ])
0 commit comments