Skip to content

Commit c34a863

Browse files
committed
added specific test for groupby valuecount interval fix
1 parent 86fe7f9 commit c34a863

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

doc/source/whatsnew/v1.0.3.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ Fixed regressions
2222

2323
Bug fixes
2424
~~~~~~~~~
25-
:issue: `25970` Fixed Series.value_counts so that normalize excludes NA values when dropna=False.
26-
25+
Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
26+
Fixed Dataframe Groupby value_counts with bins (:issue:`32471')
2727
Contributors
2828
~~~~~~~~~~~~
2929

pandas/tests/groupby/test_value_counts.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
12+
from pandas import DataFrame, Grouper, MultiIndex, Series, cut, date_range, to_datetime
1313
import pandas._testing as tm
1414

1515

@@ -41,13 +41,12 @@ def seed_df(seed_nans, n, m):
4141
ids = []
4242
for seed_nans in [True, False]:
4343
for n, m in product((100, 1000), (5, 20)):
44-
4544
df = seed_df(seed_nans, n, m)
4645
bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
4746
keys = "1st", "2nd", ["1st", "2nd"]
4847
for k, b in product(keys, bins):
4948
binned.append((df, k, b, n, m))
50-
ids.append(f"{k}-{n}-{m}")
49+
ids.append(f"{k}-{n}-{m}-{seed_nans} ")
5150

5251

5352
@pytest.mark.slow
@@ -71,6 +70,7 @@ def rebuild_index(df):
7170

7271
gr = df.groupby(keys, sort=isort)
7372
left = gr["3rd"].value_counts(**kwargs)
73+
left.index.names = left.index.names[:-1] + ["3rd"]
7474

7575
gr = df.groupby(keys, sort=isort)
7676
right = gr["3rd"].apply(Series.value_counts, **kwargs)
@@ -81,6 +81,22 @@ def rebuild_index(df):
8181
tm.assert_series_equal(left.sort_index(), right.sort_index())
8282

8383

84+
def test_groubpy_value_counts_bins():
85+
# GH32471
86+
BINS = [0, 20, 80, 100]
87+
df = DataFrame(
88+
[[0, 0], [1, 100], [0, 100], [2, 0], [3, 100]], columns=["key", "score"]
89+
)
90+
result = df.groupby("key")["score"].value_counts(bins=BINS)
91+
result.sort_index(inplace=True)
92+
intervals = cut(Series([0]), bins=BINS, include_lowest=True).cat.categories
93+
index = MultiIndex.from_product(
94+
[[0, 1, 2, 3], sorted(intervals)], names=("key", None)
95+
)
96+
expected = Series([1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1], index, name="score")
97+
tm.assert_series_equal(result, expected)
98+
99+
84100
def test_series_groupby_value_counts_with_grouper():
85101
# GH28479
86102
df = DataFrame(

0 commit comments

Comments
 (0)