updated value_count docstrings

DataInformer · DataInformer · commit 5f8eb1d77563 · 2020-04-19T12:35:30.000Z
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -663,12 +663,16 @@ def value_counts(
     ascending : bool, default False
         Sort in ascending order
     normalize: bool, default False
-        If True then compute a relative histogram
-    bins : integer, optional
-        Rather than count values, group them into half-open bins,
-        convenience for pd.cut, only works with numeric data
+        If True, then compute a relative histogram that outputs the
+        proportion of each value.
+    bins : integer or iterable of numeric, optional
+        Rather than count values, group them into half-open bins.
+        Only works with numeric data.
+        If int, interpreted as number of bins and will use pd.cut.
+        If interable of numeric, will use provided numbers as bin endpoints.
     dropna : bool, default True
-        Don't include counts of NaN
+        Don't include counts of NaN.
+        If False and NaNs are present, NaN will be a key in the output.
 
     Returns
     -------
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1176,17 +1176,20 @@ def value_counts(
         Parameters
         ----------
         normalize : bool, default False
-            If True then the object returned will contain the relative
-            frequencies of the unique values.
+            If True, outputs the relative frequencies of the unique values.
         sort : bool, default True
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int, optional
-            Rather than count values, group them into half-open bins,
-            a convenience for ``pd.cut``, only works with numeric data.
+         bins : integer or iterable of numeric, optional
+            Rather than count individual values, group them into half-open bins.
+            Only works with numeric data.
+            If int, interpreted as number of bins and will use ``pd.cut``.
+            If interable of numeric, will use provided numbers as bin endpoints.
+
         dropna : bool, default True
             Don't include counts of NaN.
+            If False and NaNs are present, NaN will be a key in the output.
 
         Returns
         -------
@@ -1223,15 +1226,26 @@ def value_counts(
 
         Bins can be useful for going from a continuous variable to a
         categorical variable; instead of counting unique
-        apparitions of values, divide the index in the specified
-        number of half-open bins.
+        instances of values, count the number of values that fall
+        into half-open intervals.
+
+        Bins can be an int.
 
         >>> s.value_counts(bins=3)
         (2.0, 3.0]      2
         (0.996, 2.0]    2
         (3.0, 4.0]      1
         dtype: int64
 
+        Bins can also be an iterable of numbers.  These numbers are treated
+        as endpoints for the intervals.
+
+        >>> s.value_counts(bins=[0,2,4,9])
+        (2.0, 4.0]      3
+        (-0.001, 2.0]    2
+        (4.0, 9.0]       0
+        dtype: int64
+
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -7,7 +7,6 @@
 """
 from collections import abc, namedtuple
 import copy
-from functools import partial
 from textwrap import dedent
 import typing
 from typing import (
@@ -41,11 +40,8 @@
     maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
-    ensure_int64,
     ensure_platform_int,
     is_bool,
-    is_integer_dtype,
-    is_interval_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
@@ -671,129 +667,14 @@ def describe(self, **kwargs):
     def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
-
-        from pandas.core.reshape.tile import cut
-        from pandas.core.reshape.merge import _get_join_indexers
-
-        if bins is not None:# and not np.iterable(bins):
-            # scalar bins cannot be done at top level
-            # in a backward compatible way
-            return self.apply(
-                Series.value_counts,
-                normalize=normalize,
-                sort=sort,
-                ascending=ascending,
-                bins=bins,
-                dropna=dropna
-            )
-
-        ids, _, _ = self.grouper.group_info
-        val = self.obj._values
-
-        # groupby removes null keys from groupings
-        mask = ids != -1
-        ids, val = ids[mask], val[mask]
-
-        if bins is None:
-            lab, lev = algorithms.factorize(val, sort=True)
-            llab = lambda lab, inc: lab[inc]
-        else:
-
-            # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            lev = lab.cat.categories
-            lab = lev.take(lab.cat.codes)
-            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
-
-        if is_interval_dtype(lab):
-            # TODO: should we do this inside II?
-            sorter = np.lexsort((lab.left, lab.right, ids))
-        else:
-            sorter = np.lexsort((lab, ids))
-
-        ids, lab = ids[sorter], lab[sorter]
-
-        # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-
-        # new values are where sorted labels change
-        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
-        inc = np.r_[True, lchanges]
-        inc[idx] = True  # group boundaries are also new values
-        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
-
-        # num. of times each group should be repeated
-        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
-
-        # multi-index components
-        codes = self.grouper.reconstructed_codes
-        codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
-        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
-        names = self.grouper.names + [self._selection_name]
-
-        if dropna:
-            mask = codes[-1] != -1
-            if mask.all():
-                dropna = False
-            else:
-                out, codes = out[mask], [level_codes[mask] for level_codes in codes]
-
-        if normalize:
-            out = out.astype("float")
-            d = np.diff(np.r_[idx, len(ids)])
-            if dropna:
-                m = ids[lab == -1]
-                np.add.at(d, m, -1)
-                acc = rep(d)[mask]
-            else:
-                acc = rep(d)
-            out /= acc
-
-        if sort and bins is None:
-            cat = ids[inc][mask] if dropna else ids[inc]
-            sorter = np.lexsort((out if ascending else -out, cat))
-            out, codes[-1] = out[sorter], codes[-1][sorter]
-
-        if bins is None:
-            mi = MultiIndex(
-                levels=levels, codes=codes, names=names, verify_integrity=False
-            )
-
-            if is_integer_dtype(out):
-                out = ensure_int64(out)
-            return Series(out, index=mi, name=self._selection_name)
-
-        # for compat. with libgroupby.value_counts need to ensure every
-        # bin is present at every index level, null filled with zeros
-        diff = np.zeros(len(out), dtype="bool")
-        for level_codes in codes[:-1]:
-            diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
-
-        ncat, nbin = diff.sum(), len(levels[-1])
-
-        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
-
-        right = [diff.cumsum() - 1, codes[-1]]
-
-        _, idx = _get_join_indexers(left, right, sort=False, how="left")
-        out = np.where(idx != -1, out[idx], 0)
-
-        if sort:
-            sorter = np.lexsort((out if ascending else -out, left[0]))
-            out, left[-1] = out[sorter], left[-1][sorter]
-
-        # build the multi-index w/ full levels
-        def build_codes(lev_codes: np.ndarray) -> np.ndarray:
-            return np.repeat(lev_codes[diff], nbin)
-
-        codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
-        codes.append(left[-1])
-
-        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
-
-        if is_integer_dtype(out):
-            out = ensure_int64(out)
-        return Series(out, index=mi, name=self._selection_name)
+        return self.apply(
+            Series.value_counts,
+            normalize=normalize,
+            sort=sort,
+            ascending=ascending,
+            bins=bins,
+            dropna=dropna,
+        )
 
     def count(self) -> Series:
         """