@@ -694,34 +694,24 @@ def test_is_monotonic_decreasing(in_vals, out_vals):
694
694
695
695
# describe
696
696
# --------------------------------
697
-
698
697
def test_describe ():
699
- df = DataFrame (
700
- [[1 , 2 , 'foo' ],
701
- [1 , np .nan , 'bar' ],
702
- [3 , np .nan , 'baz' ]],
703
- columns = ['A' , 'B' , 'C' ])
704
- g = df .groupby ('A' )
705
- gni = df .groupby ('A' , as_index = False )
706
-
707
- # describe
708
- expected_index = pd .Index ([1 , 3 ], name = 'A' )
709
- expected_col = pd .MultiIndex (levels = [['B' ],
710
- ['count' , 'mean' , 'std' , 'min' ,
711
- '25%' , '50%' , '75%' , 'max' ]],
712
- labels = [[0 ] * 8 , list (range (8 ))])
713
- expected = pd .DataFrame ([[1.0 , 2.0 , np .nan , 2.0 , 2.0 , 2.0 , 2.0 , 2.0 ],
714
- [0.0 , np .nan , np .nan , np .nan , np .nan , np .nan ,
715
- np .nan , np .nan ]],
716
- index = expected_index ,
717
- columns = expected_col )
718
- result = g .describe ()
719
- tm .assert_frame_equal (result , expected )
720
-
721
- expected = pd .concat ([df [df .A == 1 ].describe ().unstack ().to_frame ().T ,
722
- df [df .A == 3 ].describe ().unstack ().to_frame ().T ])
723
- expected .index = pd .Index ([0 , 1 ])
724
- result = gni .describe ()
698
+ df = DataFrame ([
699
+ [1 , 2 , 'foo' ],
700
+ [1 , np .nan , 'bar' ],
701
+ [3 , np .nan , 'baz' ]
702
+ ], columns = ['A' , 'B' , 'C' ])
703
+ grp = df .groupby ('A' )
704
+
705
+ index = pd .Index ([1 , 3 ], name = 'A' )
706
+ columns = pd .MultiIndex .from_product ([
707
+ ['B' ], ['count' , 'mean' , 'std' , 'min' , '25%' , '50%' , '75%' , 'max' ]])
708
+
709
+ expected = pd .DataFrame ([
710
+ [1.0 , 2.0 , np .nan , 2.0 , 2.0 , 2.0 , 2.0 , 2.0 ],
711
+ [0.0 , np .nan , np .nan , np .nan , np .nan , np .nan , np .nan , np .nan ]
712
+ ], index = index , columns = columns )
713
+
714
+ result = grp .describe ()
725
715
tm .assert_frame_equal (result , expected )
726
716
727
717
@@ -1089,7 +1079,7 @@ def test_size(df):
1089
1079
# --------------------------------
1090
1080
@pytest .mark .parametrize ("interpolation" , [
1091
1081
"linear" , "lower" , "higher" , "nearest" , "midpoint" ])
1092
- @pytest .mark .parametrize ("bar_vals,foo_vals " , [
1082
+ @pytest .mark .parametrize ("a_vals,b_vals " , [
1093
1083
# Ints
1094
1084
([1 , 2 , 3 , 4 , 5 ], [5 , 4 , 3 , 2 , 1 ]),
1095
1085
([1 , 2 , 3 , 4 ], [4 , 3 , 2 , 1 ]),
@@ -1101,35 +1091,33 @@ def test_size(df):
1101
1091
([np .nan , 4. , np .nan , 2. , np .nan ], [np .nan , 4. , np .nan , 2. , np .nan ]),
1102
1092
# Timestamps
1103
1093
([x for x in pd .date_range ('1/1/18' , freq = 'D' , periods = 5 )],
1104
- [x for x in pd .date_range ('1/1/18' , freq = 'D' , periods = 5 )[::- 1 ]])
1094
+ [x for x in pd .date_range ('1/1/18' , freq = 'D' , periods = 5 )][::- 1 ]),
1095
+ # All NA
1096
+ ([np .nan ] * 5 , [np .nan ] * 5 ),
1105
1097
])
1106
1098
@pytest .mark .parametrize ('q' , [0 , .25 , .5 , .75 , 1 ])
1107
- def test_quantile (interpolation , bar_vals , foo_vals , q ):
1108
- # Fringe test case was not working as expected?
1109
- if (interpolation == 'nearest' and q == 0.5 and foo_vals == [
1110
- 4 , 3 , 2 , 1 ]):
1099
+ def test_quantile (interpolation , a_vals , b_vals , q ):
1100
+ if interpolation == 'nearest' and q == 0.5 and b_vals == [4 , 3 , 2 , 1 ]:
1111
1101
pytest .skip ("Unclear numpy expectation for nearest result with "
1112
1102
"equidistant data" )
1113
- bar_ser = pd .Series (bar_vals )
1114
- bar_exp = bar_ser .quantile (q , interpolation = interpolation )
1115
- foo_ser = pd .Series (foo_vals )
1116
- foo_exp = foo_ser .quantile (q , interpolation = interpolation )
1103
+
1104
+ a_expected = pd .Series (a_vals ).quantile (q , interpolation = interpolation )
1105
+ b_expected = pd .Series (b_vals ).quantile (q , interpolation = interpolation )
1117
1106
1118
1107
df = pd .DataFrame ({
1119
- 'key' : ['bar ' ] * len (bar_vals ) + ['foo ' ] * len (foo_vals ),
1120
- 'val' : bar_vals + foo_vals })
1108
+ 'key' : ['a ' ] * len (a_vals ) + ['b ' ] * len (b_vals ),
1109
+ 'val' : a_vals + b_vals })
1121
1110
1122
- exp = DataFrame ([bar_exp , foo_exp ], columns = ['val' ],
1123
- index = Index (['bar' , 'foo' ], name = 'key' ))
1124
- res = df .groupby ('key' ).quantile (q , interpolation = interpolation )
1125
- tm .assert_frame_equal (exp , res )
1111
+ expected = DataFrame ([a_expected , b_expected ], columns = ['val' ],
1112
+ index = Index (['a' , 'b' ], name = 'key' ))
1113
+ result = df .groupby ('key' ).quantile (q , interpolation = interpolation )
1114
+
1115
+ tm .assert_frame_equal (result , expected )
1126
1116
1127
1117
1128
1118
def test_quantile_raises ():
1129
- df = pd .DataFrame (
1130
- [['foo' , 'a' ],
1131
- ['foo' , 'b' ],
1132
- ['foo' , 'c' ]], columns = ['key' , 'val' ])
1119
+ df = pd .DataFrame ([
1120
+ ['foo' , 'a' ], ['foo' , 'b' ], ['foo' , 'c' ]], columns = ['key' , 'val' ])
1133
1121
1134
1122
with tm .assert_raises_regex (TypeError , "cannot be performed against "
1135
1123
"'object' dtypes" ):
0 commit comments