9
9
import numpy as np
10
10
import pytest
11
11
12
- from pandas import DataFrame , Grouper , MultiIndex , Series , date_range , to_datetime
12
+ from pandas import DataFrame , Grouper , MultiIndex , Series , cut , date_range , to_datetime
13
13
import pandas ._testing as tm
14
14
15
15
@@ -41,13 +41,12 @@ def seed_df(seed_nans, n, m):
41
41
ids = []
42
42
for seed_nans in [True , False ]:
43
43
for n , m in product ((100 , 1000 ), (5 , 20 )):
44
-
45
44
df = seed_df (seed_nans , n , m )
46
45
bins = None , np .arange (0 , max (5 , df ["3rd" ].max ()) + 1 , 2 )
47
46
keys = "1st" , "2nd" , ["1st" , "2nd" ]
48
47
for k , b in product (keys , bins ):
49
48
binned .append ((df , k , b , n , m ))
50
- ids .append (f"{ k } -{ n } -{ m } " )
49
+ ids .append (f"{ k } -{ n } -{ m } - { seed_nans } " )
51
50
52
51
53
52
@pytest .mark .slow
@@ -71,6 +70,7 @@ def rebuild_index(df):
71
70
72
71
gr = df .groupby (keys , sort = isort )
73
72
left = gr ["3rd" ].value_counts (** kwargs )
73
+ left .index .names = left .index .names [:- 1 ] + ["3rd" ]
74
74
75
75
gr = df .groupby (keys , sort = isort )
76
76
right = gr ["3rd" ].apply (Series .value_counts , ** kwargs )
@@ -81,6 +81,22 @@ def rebuild_index(df):
81
81
tm .assert_series_equal (left .sort_index (), right .sort_index ())
82
82
83
83
84
+ def test_groubpy_value_counts_bins ():
85
+ # GH32471
86
+ BINS = [0 , 20 , 80 , 100 ]
87
+ df = DataFrame (
88
+ [[0 , 0 ], [1 , 100 ], [0 , 100 ], [2 , 0 ], [3 , 100 ]], columns = ["key" , "score" ]
89
+ )
90
+ result = df .groupby ("key" )["score" ].value_counts (bins = BINS )
91
+ result .sort_index (inplace = True )
92
+ intervals = cut (Series ([0 ]), bins = BINS , include_lowest = True ).cat .categories
93
+ index = MultiIndex .from_product (
94
+ [[0 , 1 , 2 , 3 ], sorted (intervals )], names = ("key" , None )
95
+ )
96
+ expected = Series ([1 , 0 , 1 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 1 ], index , name = "score" )
97
+ tm .assert_series_equal (result , expected )
98
+
99
+
84
100
def test_series_groupby_value_counts_with_grouper ():
85
101
# GH28479
86
102
df = DataFrame (
0 commit comments