Skip to content

Commit d2399ea

Browse files
committed
speedups 1
1 parent 7ae1280 commit d2399ea

File tree

2 files changed

+7
-21
lines changed

2 files changed

+7
-21
lines changed

pandas/core/groupby/generic.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ def value_counts(
684684

685685
from pandas.core.reshape.tile import cut
686686

687+
"""
687688
if bins is not None and not np.iterable(bins):
688689
# scalar bins cannot be done at top level
689690
# in a backward compatible way
@@ -694,6 +695,7 @@ def value_counts(
694695
ascending=ascending,
695696
bins=bins,
696697
)
698+
"""
697699
ids, _, _ = self.grouper.group_info
698700
val = self.obj._values
699701
codes = self.grouper.reconstructed_codes # this will track the groups
@@ -735,14 +737,9 @@ def value_counts(
735737
val_lab = val_lab[changes]
736738
ids = ids[changes]
737739
cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
738-
739740
idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
741+
# how many times each index gets repeated
740742
rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
741-
num_repeats = np.diff(np.nonzero(np.r_[True, change_ids, True]))[0]
742-
743-
change_ids = np.r_[ # need to update now that we removed full repeats
744-
ids[1:] != ids[:-1], True
745-
]
746743

747744
if (not dropna) and (-1 in val_lab):
748745
# in this case we need to explicitly add NaN as a level
@@ -755,6 +752,7 @@ def value_counts(
755752
names = self.grouper.names + [self._selection_name]
756753

757754
if normalize:
755+
num_repeats = np.diff(idx, append=len(ids))
758756
cts = cts.astype("float")
759757
cts /= rep(
760758
num_repeats
@@ -784,6 +782,9 @@ def value_counts(
784782
ncat = len(codes[0])
785783
fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
786784
id = 0
785+
change_ids = np.r_[ # need to update now that we removed full repeats
786+
ids[1:] != ids[:-1], True
787+
]
787788
for i, ct in enumerate(cts): # fill in nonzero values of fout
788789
fout[id * nbin + val_lab[i]] = cts[i]
789790
id += change_ids[i]

pandas/tests/groupby/test_value_counts.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,13 @@ def rebuild_index(df):
6868
normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins
6969
)
7070

71-
print(f"{df=}")
7271
gr = df.groupby(keys, sort=isort)
7372
left = gr["3rd"].value_counts(**kwargs)
7473
left.index.names = left.index.names[:-1] + ["3rd"]
7574

7675
# gr = df.groupby(keys, sort=isort)
7776
right = gr["3rd"].apply(Series.value_counts, **kwargs)
7877
right.index.names = right.index.names[:-1] + ["3rd"]
79-
print(f"{left=}")
80-
print(f"{right=}")
8178

8279
# have to sort on index because of unstable sort on values
8380
left, right = map(rebuild_index, (left, right)) # xref GH9212
@@ -101,18 +98,6 @@ def test_groubpy_value_counts_bins():
10198
]
10299
df = DataFrame(values, columns=["key1", "key2", "score"])
103100
result = df.groupby(["key1", "key2"])["score"].value_counts(bins=BINS)
104-
print(f"{result=}")
105-
print(
106-
df.groupby(["key1", "key2"])["score"].apply(
107-
Series.value_counts,
108-
bins=BINS,
109-
sort=True,
110-
normalize=True,
111-
ascending=True,
112-
dropna=True,
113-
)
114-
)
115-
116101
result.sort_index(inplace=True)
117102
expected = Series(
118103
[1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"

0 commit comments

Comments
 (0)