@@ -3706,292 +3706,6 @@ def test_index_label_overlaps_location(self):
3706
3706
expected = ser .take ([1 , 3 , 4 ])
3707
3707
assert_series_equal (actual , expected )
3708
3708
3709
- def test_groupby_selection_with_methods (self ):
3710
- # some methods which require DatetimeIndex
3711
- rng = pd .date_range ('2014' , periods = len (self .df ))
3712
- self .df .index = rng
3713
-
3714
- g = self .df .groupby (['A' ])[['C' ]]
3715
- g_exp = self .df [['C' ]].groupby (self .df ['A' ])
3716
- # TODO check groupby with > 1 col ?
3717
-
3718
- # methods which are called as .foo()
3719
- methods = ['count' ,
3720
- 'corr' ,
3721
- 'cummax' ,
3722
- 'cummin' ,
3723
- 'cumprod' ,
3724
- 'describe' ,
3725
- 'rank' ,
3726
- 'quantile' ,
3727
- 'diff' ,
3728
- 'shift' ,
3729
- 'all' ,
3730
- 'any' ,
3731
- 'idxmin' ,
3732
- 'idxmax' ,
3733
- 'ffill' ,
3734
- 'bfill' ,
3735
- 'pct_change' ,
3736
- 'tshift' ]
3737
-
3738
- for m in methods :
3739
- res = getattr (g , m )()
3740
- exp = getattr (g_exp , m )()
3741
- assert_frame_equal (res , exp ) # should always be frames!
3742
-
3743
- # methods which aren't just .foo()
3744
- assert_frame_equal (g .fillna (0 ), g_exp .fillna (0 ))
3745
- assert_frame_equal (g .dtypes , g_exp .dtypes )
3746
- assert_frame_equal (g .apply (lambda x : x .sum ()),
3747
- g_exp .apply (lambda x : x .sum ()))
3748
-
3749
- assert_frame_equal (g .resample ('D' ).mean (), g_exp .resample ('D' ).mean ())
3750
- assert_frame_equal (g .resample ('D' ).ohlc (),
3751
- g_exp .resample ('D' ).ohlc ())
3752
-
3753
- assert_frame_equal (g .filter (lambda x : len (x ) == 3 ),
3754
- g_exp .filter (lambda x : len (x ) == 3 ))
3755
-
3756
- # The methods returned by these attributes don't have a __name__ attribute
3757
- # that matches that attribute.
3758
- # TODO: Fix these inconsistencies
3759
- DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset ([
3760
- 'boxplot' ,
3761
- 'bfill' ,
3762
- 'ffill'
3763
- ])
3764
- S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE = frozenset ([
3765
- 'bfill' ,
3766
- 'ffill'
3767
- ])
3768
-
3769
- def test_groupby_whitelist (self ):
3770
- from string import ascii_lowercase
3771
- letters = np .array (list (ascii_lowercase ))
3772
- N = 10
3773
- random_letters = letters .take (np .random .randint (0 , 26 , N ))
3774
- df = DataFrame ({'floats' : N / 10 * Series (np .random .random (N )),
3775
- 'letters' : Series (random_letters )})
3776
- s = df .floats
3777
-
3778
- df_whitelist = frozenset ([
3779
- 'last' ,
3780
- 'first' ,
3781
- 'mean' ,
3782
- 'sum' ,
3783
- 'min' ,
3784
- 'max' ,
3785
- 'head' ,
3786
- 'tail' ,
3787
- 'cumcount' ,
3788
- 'resample' ,
3789
- 'rank' ,
3790
- 'quantile' ,
3791
- 'fillna' ,
3792
- 'mad' ,
3793
- 'any' ,
3794
- 'all' ,
3795
- 'take' ,
3796
- 'idxmax' ,
3797
- 'idxmin' ,
3798
- 'shift' ,
3799
- 'tshift' ,
3800
- 'ffill' ,
3801
- 'bfill' ,
3802
- 'pct_change' ,
3803
- 'skew' ,
3804
- 'plot' ,
3805
- 'boxplot' ,
3806
- 'hist' ,
3807
- 'median' ,
3808
- 'dtypes' ,
3809
- 'corrwith' ,
3810
- 'corr' ,
3811
- 'cov' ,
3812
- 'diff' ,
3813
- ])
3814
- s_whitelist = frozenset ([
3815
- 'last' ,
3816
- 'first' ,
3817
- 'mean' ,
3818
- 'sum' ,
3819
- 'min' ,
3820
- 'max' ,
3821
- 'head' ,
3822
- 'tail' ,
3823
- 'cumcount' ,
3824
- 'resample' ,
3825
- 'rank' ,
3826
- 'quantile' ,
3827
- 'fillna' ,
3828
- 'mad' ,
3829
- 'any' ,
3830
- 'all' ,
3831
- 'take' ,
3832
- 'idxmax' ,
3833
- 'idxmin' ,
3834
- 'shift' ,
3835
- 'tshift' ,
3836
- 'ffill' ,
3837
- 'bfill' ,
3838
- 'pct_change' ,
3839
- 'skew' ,
3840
- 'plot' ,
3841
- 'hist' ,
3842
- 'median' ,
3843
- 'dtype' ,
3844
- 'corr' ,
3845
- 'cov' ,
3846
- 'diff' ,
3847
- 'unique' ,
3848
- 'nlargest' ,
3849
- 'nsmallest' ,
3850
- ])
3851
-
3852
- names_dont_match_pair = (
3853
- self .DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE ,
3854
- self .S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE )
3855
- for obj , whitelist , names_dont_match in (
3856
- zip ((df , s ),
3857
- (df_whitelist , s_whitelist ),
3858
- names_dont_match_pair )):
3859
-
3860
- gb = obj .groupby (df .letters )
3861
-
3862
- assert whitelist == gb ._apply_whitelist
3863
- for m in whitelist :
3864
- f = getattr (type (gb ), m )
3865
-
3866
- # name
3867
- try :
3868
- n = f .__name__
3869
- except AttributeError :
3870
- continue
3871
- if m not in names_dont_match :
3872
- assert n == m
3873
-
3874
- # qualname
3875
- if compat .PY3 :
3876
- try :
3877
- n = f .__qualname__
3878
- except AttributeError :
3879
- continue
3880
- if m not in names_dont_match :
3881
- assert n .endswith (m )
3882
-
3883
- def test_groupby_method_names_that_dont_match_attribute (self ):
3884
- from string import ascii_lowercase
3885
- letters = np .array (list (ascii_lowercase ))
3886
- N = 10
3887
- random_letters = letters .take (np .random .randint (0 , 26 , N ))
3888
- df = DataFrame ({'floats' : N / 10 * Series (np .random .random (N )),
3889
- 'letters' : Series (random_letters )})
3890
- gb = df .groupby (df .letters )
3891
- s = df .floats
3892
-
3893
- names_dont_match_pair = (
3894
- self .DF_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE ,
3895
- self .S_METHOD_NAMES_THAT_DONT_MATCH_ATTRIBUTE )
3896
- for obj , names_dont_match in zip ((df , s ), names_dont_match_pair ):
3897
- gb = obj .groupby (df .letters )
3898
- for m in names_dont_match :
3899
- f = getattr (gb , m )
3900
- self .assertNotEqual (f .__name__ , m )
3901
-
3902
- AGG_FUNCTIONS = ['sum' , 'prod' , 'min' , 'max' , 'median' , 'mean' , 'skew' ,
3903
- 'mad' , 'std' , 'var' , 'sem' ]
3904
- AGG_FUNCTIONS_WITH_SKIPNA = ['skew' , 'mad' ]
3905
-
3906
- def test_regression_whitelist_methods (self ):
3907
-
3908
- # GH6944
3909
- # explicity test the whitelest methods
3910
- index = MultiIndex (levels = [['foo' , 'bar' , 'baz' , 'qux' ], ['one' , 'two' ,
3911
- 'three' ]],
3912
- labels = [[0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 3 ],
3913
- [0 , 1 , 2 , 0 , 1 , 1 , 2 , 0 , 1 , 2 ]],
3914
- names = ['first' , 'second' ])
3915
- raw_frame = DataFrame (np .random .randn (10 , 3 ), index = index ,
3916
- columns = Index (['A' , 'B' , 'C' ], name = 'exp' ))
3917
- raw_frame .iloc [1 , [1 , 2 ]] = np .nan
3918
- raw_frame .iloc [7 , [0 , 1 ]] = np .nan
3919
-
3920
- for op , level , axis , skipna in cart_product (self .AGG_FUNCTIONS ,
3921
- lrange (2 ), lrange (2 ),
3922
- [True , False ]):
3923
-
3924
- if axis == 0 :
3925
- frame = raw_frame
3926
- else :
3927
- frame = raw_frame .T
3928
-
3929
- if op in self .AGG_FUNCTIONS_WITH_SKIPNA :
3930
- grouped = frame .groupby (level = level , axis = axis )
3931
- result = getattr (grouped , op )(skipna = skipna )
3932
- expected = getattr (frame , op )(level = level , axis = axis ,
3933
- skipna = skipna )
3934
- assert_frame_equal (result , expected )
3935
- else :
3936
- grouped = frame .groupby (level = level , axis = axis )
3937
- result = getattr (grouped , op )()
3938
- expected = getattr (frame , op )(level = level , axis = axis )
3939
- assert_frame_equal (result , expected )
3940
-
3941
- def test_groupby_blacklist (self ):
3942
- from string import ascii_lowercase
3943
- letters = np .array (list (ascii_lowercase ))
3944
- N = 10
3945
- random_letters = letters .take (np .random .randint (0 , 26 , N ))
3946
- df = DataFrame ({'floats' : N / 10 * Series (np .random .random (N )),
3947
- 'letters' : Series (random_letters )})
3948
- s = df .floats
3949
-
3950
- blacklist = [
3951
- 'eval' , 'query' , 'abs' , 'where' ,
3952
- 'mask' , 'align' , 'groupby' , 'clip' , 'astype' ,
3953
- 'at' , 'combine' , 'consolidate' , 'convert_objects' ,
3954
- ]
3955
- to_methods = [method for method in dir (df ) if method .startswith ('to_' )]
3956
-
3957
- blacklist .extend (to_methods )
3958
-
3959
- # e.g., to_csv
3960
- defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
3961
- "'apply' method$)" )
3962
-
3963
- # e.g., query, eval
3964
- not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
3965
- fmt = defined_but_not_allowed + '|' + not_defined
3966
- for bl in blacklist :
3967
- for obj in (df , s ):
3968
- gb = obj .groupby (df .letters )
3969
- msg = fmt .format (bl , type (gb ).__name__ )
3970
- with tm .assertRaisesRegexp (AttributeError , msg ):
3971
- getattr (gb , bl )
3972
-
3973
- def test_tab_completion (self ):
3974
- grp = self .mframe .groupby (level = 'second' )
3975
- results = set ([v for v in dir (grp ) if not v .startswith ('_' )])
3976
- expected = set (
3977
- ['A' , 'B' , 'C' , 'agg' , 'aggregate' , 'apply' , 'boxplot' , 'filter' ,
3978
- 'first' , 'get_group' , 'groups' , 'hist' , 'indices' , 'last' , 'max' ,
3979
- 'mean' , 'median' , 'min' , 'name' , 'ngroups' , 'nth' , 'ohlc' , 'plot' ,
3980
- 'prod' , 'size' , 'std' , 'sum' , 'transform' , 'var' , 'sem' , 'count' ,
3981
- 'nunique' , 'head' , 'describe' , 'cummax' , 'quantile' ,
3982
- 'rank' , 'cumprod' , 'tail' , 'resample' , 'cummin' , 'fillna' ,
3983
- 'cumsum' , 'cumcount' , 'all' , 'shift' , 'skew' , 'bfill' , 'ffill' ,
3984
- 'take' , 'tshift' , 'pct_change' , 'any' , 'mad' , 'corr' , 'corrwith' ,
3985
- 'cov' , 'dtypes' , 'ndim' , 'diff' , 'idxmax' , 'idxmin' ,
3986
- 'ffill' , 'bfill' , 'pad' , 'backfill' , 'rolling' , 'expanding' ])
3987
- self .assertEqual (results , expected )
3988
-
3989
- def test_groupby_function_rename (self ):
3990
- grp = self .mframe .groupby (level = 'second' )
3991
- for name in ['sum' , 'prod' , 'min' , 'max' , 'first' , 'last' ]:
3992
- f = getattr (grp , name )
3993
- self .assertEqual (f .__name__ , name )
3994
-
3995
3709
def test_lower_int_prec_count (self ):
3996
3710
df = DataFrame ({'a' : np .array (
3997
3711
[0 , 1 , 2 , 100 ], np .int8 ),
0 commit comments