speedups 1

DataInformer · DataInformer · commit d2399ea4a7bd · 2020-07-27T12:17:15.000-04:00
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -684,6 +684,7 @@ def value_counts(
 
         from pandas.core.reshape.tile import cut
 
+        """
         if bins is not None and not np.iterable(bins):
             # scalar bins cannot be done at top level
             # in a backward compatible way
@@ -694,6 +695,7 @@ def value_counts(
                 ascending=ascending,
                 bins=bins,
             )
+        """
         ids, _, _ = self.grouper.group_info
         val = self.obj._values
         codes = self.grouper.reconstructed_codes  # this will track the groups
@@ -735,14 +737,9 @@ def value_counts(
         val_lab = val_lab[changes]
         ids = ids[changes]
         cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
-
         idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
+        # how many times each index gets repeated
         rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
-        num_repeats = np.diff(np.nonzero(np.r_[True, change_ids, True]))[0]
-
-        change_ids = np.r_[  # need to update now that we removed full repeats
-            ids[1:] != ids[:-1], True
-        ]
 
         if (not dropna) and (-1 in val_lab):
             # in this case we need to explicitly add NaN as a level
@@ -755,6 +752,7 @@ def value_counts(
         names = self.grouper.names + [self._selection_name]
 
         if normalize:
+            num_repeats = np.diff(idx, append=len(ids))
             cts = cts.astype("float")
             cts /= rep(
                 num_repeats
@@ -784,6 +782,9 @@ def value_counts(
         ncat = len(codes[0])
         fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
         id = 0
+        change_ids = np.r_[  # need to update now that we removed full repeats
+            ids[1:] != ids[:-1], True
+        ]
         for i, ct in enumerate(cts):  # fill in nonzero values of fout
             fout[id * nbin + val_lab[i]] = cts[i]
             id += change_ids[i]
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
@@ -68,16 +68,13 @@ def rebuild_index(df):
         normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins
     )
 
-    print(f"{df=}")
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
     left.index.names = left.index.names[:-1] + ["3rd"]
 
     # gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
     right.index.names = right.index.names[:-1] + ["3rd"]
-    print(f"{left=}")
-    print(f"{right=}")
 
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
@@ -101,18 +98,6 @@ def test_groubpy_value_counts_bins():
     ]
     df = DataFrame(values, columns=["key1", "key2", "score"])
     result = df.groupby(["key1", "key2"])["score"].value_counts(bins=BINS)
-    print(f"{result=}")
-    print(
-        df.groupby(["key1", "key2"])["score"].apply(
-            Series.value_counts,
-            bins=BINS,
-            sort=True,
-            normalize=True,
-            ascending=True,
-            dropna=True,
-        )
-    )
-
     result.sort_index(inplace=True)
     expected = Series(
         [1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"