Skip to content

Commit bd9011a

Browse files
committed
made nan count when dropna=False
1 parent 3a5ae50 commit bd9011a

File tree

4 files changed

+13
-4
lines changed

4 files changed

+13
-4
lines changed

doc/source/whatsnew/v1.0.3.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Fixed regressions
2222

2323
Bug fixes
2424
~~~~~~~~~
25+
:issue: `25970` Fixed Series.value_counts so that normalize excludes NA values when dropna=False.
2526

2627
Contributors
2728
~~~~~~~~~~~~

pandas/core/algorithms.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,16 +695,15 @@ def value_counts(
695695

696696
# count, remove nulls (from the index), and but the bins
697697
result = ii.value_counts(dropna=dropna)
698-
result = result[result.index.notna()]
699698
result.index = result.index.astype("interval")
700699
result = result.sort_index()
701700

702701
# if we are dropna and we have NO values
703702
if dropna and (result._values == 0).all():
704703
result = result.iloc[0:0]
705704

706-
# normalizing is by len of all (regardless of dropna)
707-
counts = np.array([len(ii)])
705+
# normalizing is by len of what gets included in the bins
706+
counts = result._values
708707

709708
else:
710709

pandas/core/groupby/generic.py

100644100755
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def value_counts(
675675
from pandas.core.reshape.tile import cut
676676
from pandas.core.reshape.merge import _get_join_indexers
677677

678-
if bins is not None and not np.iterable(bins):
678+
if bins is not None:# and not np.iterable(bins):
679679
# scalar bins cannot be done at top level
680680
# in a backward compatible way
681681
return self.apply(
@@ -684,6 +684,7 @@ def value_counts(
684684
sort=sort,
685685
ascending=ascending,
686686
bins=bins,
687+
dropna=dropna
687688
)
688689

689690
ids, _, _ = self.grouper.group_info

pandas/tests/base/test_value_counts.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,14 @@ def test_value_counts_bins(index_or_series):
190190

191191
assert s.nunique() == 0
192192

193+
# handle normalizing bins with NA's properly
194+
# see GH25970
195+
s2 = Series([1,2,2,3,3,3, np.nan, np.nan, 4, 5])
196+
intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
197+
expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1,0,2]))
198+
expected_keepna_vals = np.array([0.3, 0.3, 0.2, 0.2])
199+
tm.assert_series_equal(s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna)
200+
tm.assert_numpy_array_equal(s2.value_counts(dropna=False, normalize=True, bins=3).values, expected_keepna_vals)
193201

194202
def test_value_counts_datetime64(index_or_series):
195203
klass = index_or_series

0 commit comments

Comments
 (0)