From 0830e36583a8ccf7a31479c22eca40b03769b108 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Wed, 22 Jan 2020 22:18:09 -0600
Subject: [PATCH 01/26] Add value_counts tests

---
 pandas/tests/frame/test_value_counts.py | 111 ++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 pandas/tests/frame/test_value_counts.py

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
new file mode 100644
index 0000000000000..565ff4fcdf3b8
--- /dev/null
+++ b/pandas/tests/frame/test_value_counts.py
@@ -0,0 +1,111 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_data_frame_value_counts_unsorted():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    result = df.value_counts(sort=False)
+    expected = pd.Series(
+        data=[1, 2, 1],
+        index=pd.MultiIndex.from_arrays(
+            [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_ascending():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    result = df.value_counts(ascending=True)
+    expected = pd.Series(
+        data=[1, 1, 2],
+        index=pd.MultiIndex.from_arrays(
+            [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_default():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    result = df.value_counts()
+    expected = pd.Series(
+        data=[2, 1, 1],
+        index=pd.MultiIndex.from_arrays(
+            [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_normalize():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    result = df.value_counts(normalize=True)
+    expected = pd.Series(
+        data=[0.5, 0.25, 0.25],
+        index=pd.MultiIndex.from_arrays(
+            [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_dropna_not_supported_yet():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    with pytest.raises(NotImplementedError, match="not yet supported"):
+        df.value_counts(dropna=False)
+
+
+def test_data_frame_value_counts_bins_not_supported():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    with pytest.raises(NotImplementedError, match="not yet supported"):
+        df.value_counts(bins=2)
+ 
+ 
+def test_data_frame_value_counts_single_col_default():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+    df_single_col = df[["num_legs"]]
+    result = df_single_col.value_counts()
+    expected = pd.Series(
+        data=[2, 1, 1],
+        index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]),
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty():
+    df_no_cols = pd.DataFrame()
+    result = df_no_cols.value_counts()
+    expected = pd.Series([], dtype=np.int64)
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty_normalize():
+    df_no_cols = pd.DataFrame()
+    result = df_no_cols.value_counts(normalize=True)
+    expected = pd.Series([], dtype=np.float64)
+    tm.assert_series_equal(result, expected)

From d946e93db1638f31791ea4120cc270b597c0a420 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Wed, 22 Jan 2020 22:18:33 -0600
Subject: [PATCH 02/26] Update docs

---
 doc/source/reference/frame.rst | 1 +
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index 01aa6c60e3b2f..f9aa87e0235da 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -168,6 +168,7 @@ Computations / descriptive stats
    DataFrame.std
    DataFrame.var
    DataFrame.nunique
+   DataFrame.value_counts
 
 Reindexing / selection / label manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index c8e811ce82b1f..5bd00f3429687 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -54,7 +54,7 @@ Other API changes
 
 - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
   will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
--
+- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
 -
 
 .. ---------------------------------------------------------------------------

From 7d9306de423dd2286c8b3fb3e89c694e3f72330c Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Wed, 22 Jan 2020 22:21:05 -0600
Subject: [PATCH 03/26] Start implementing value_counts

---
 pandas/core/frame.py | 117 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4257083cc8dc5..267440a9c94e1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -106,7 +106,7 @@
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
 from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.core.indexes.multi import maybe_droplevels
+from pandas.core.indexes.multi import MultiIndex, maybe_droplevels
 from pandas.core.indexes.period import PeriodIndex
 from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
 from pandas.core.internals import BlockManager
@@ -5064,6 +5064,121 @@ def sort_index(
         else:
             return self._constructor(new_data).__finalize__(self)
 
+    def value_counts(
+        self,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        bins=None,
+        dropna: bool = True,
+    ):
+        """
+        Return a Series containing counts of unique rows in the DataFrame.
+        .. versionadded:: 1.0.0
+        The returned Series will have a MultiIndex with one level per input
+        column.
+        By default, rows that contain any NaN value are omitted from the
+        results.
+        By default, the resulting series will be in descending order so that the
+        first element is the most frequently-occurring row.
+        Parameters
+        ----------
+        subset : list-like, default self.columns
+            Columns to use when counting unique combinations.
+        normalize : boolean, default False
+            Return proportions rather than frequencies.
+        sort : boolean, default True
+            Sort by frequencies.
+        ascending : boolean, default False
+            Sort in ascending order.
+        bins : integer, optional
+            This parameter is not yet supported and must be set to None (the
+            default value). It exists to ensure compatibiliy with
+            `Series.value_counts`.
+            Rather than count values, group them into half-open bins,
+            a convenience for ``pd.cut``, only works with single-column numeric
+            data.
+        dropna : boolean, default True
+            This parameter is not yet supported and must be set to True (the
+            default value). It exists to ensure compatibiliy with
+            `Series.value_counts`.
+            Don't include counts of rows containing NaN.
+        Returns
+        -------
+        counts : Series
+        See Also
+        --------
+        Series.value_counts: Equivalent method on Series.
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
+        ...                    'num_wings': [2, 0, 0, 0]},
+        ...                   index=['falcon', 'dog', 'cat', 'ant'])
+        >>> df
+                num_legs  num_wings
+        falcon         2          2
+        dog            4          0
+        cat            4          0
+        ant            6          0
+        >>> df.value_counts()
+        num_legs  num_wings
+        4         0            2
+        6         0            1
+        2         2            1
+        dtype: int64
+        >>> df.value_counts(sort=False)
+        num_legs  num_wings
+        2         2            1
+        4         0            2
+        6         0            1
+        dtype: int64
+        >>> df.value_counts(ascending=True)
+        num_legs  num_wings
+        2         2            1
+        6         0            1
+        4         0            2
+        dtype: int64
+        >>> df.value_counts(normalize=True)
+        num_legs  num_wings
+        4         0            0.50
+        6         0            0.25
+        2         2            0.25
+        dtype: float64
+        >>> single_col_df = df[['num_legs']]
+        >>> single_col_df.value_counts(bins=4)
+        num_legs
+        (3.0, 4.0]      2
+        (5.0, 6.0]      1
+        (1.995, 3.0]    1
+        (4.0, 5.0]      0
+        dtype: int64
+        """
+        if subset is None:
+            subset = self.columns.tolist()
+
+        # Some features not supported yet
+        if not dropna:
+            raise NotImplementedError(
+                "`dropna=False` not yet supported for DataFrames."
+            )
+        if (bins is not None) and (len(subset) > 1):
+            raise NotImplementedError(
+                "`bins` parameter not yet supported for more than one column."
+            )
+
+        counts = self.groupby(subset).size()
+
+        if sort:
+            counts = counts.sort_values(ascending=ascending)
+        if normalize:
+            counts /= counts.sum()
+        # Force MultiIndex for single column
+        if len(subset) == 1:
+            counts.index = MultiIndex.from_arrays([counts.index])
+
+        return counts
+
     def nlargest(self, n, columns, keep="first") -> "DataFrame":
         """
         Return the first `n` rows ordered by `columns` in descending order.

From 2e58db44335efcc38dd98f0cc04a5620104d8962 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Wed, 22 Jan 2020 22:26:03 -0600
Subject: [PATCH 04/26] Set MultiIndex name

---
 pandas/core/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 267440a9c94e1..025953826499e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5175,7 +5175,9 @@ def value_counts(
             counts /= counts.sum()
         # Force MultiIndex for single column
         if len(subset) == 1:
-            counts.index = MultiIndex.from_arrays([counts.index])
+            counts.index = MultiIndex.from_arrays(
+                [counts.index], names=[counts.index.name]
+            )
 
         return counts
 

From 25d7f2f30d6eea7b22727511b5a62013ed7b4393 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 07:59:48 -0600
Subject: [PATCH 05/26] Format

---
 pandas/core/frame.py                    | 17 +++++++++++------
 pandas/tests/frame/test_value_counts.py | 20 ++++++++++++++++++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 025953826499e..2545d97ff9c79 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5075,13 +5075,14 @@ def value_counts(
     ):
         """
         Return a Series containing counts of unique rows in the DataFrame.
-        .. versionadded:: 1.0.0
+        .. versionadded:: 1.1.0
         The returned Series will have a MultiIndex with one level per input
         column.
-        By default, rows that contain any NaN value are omitted from the
-        results.
-        By default, the resulting series will be in descending order so that the
+        By default, rows that contain any NA values are omitted from the
+        result.
+        By default, the resulting Series will be in descending order so that the
         first element is the most frequently-occurring row.
+
         Parameters
         ----------
         subset : list-like, default self.columns
@@ -5103,13 +5104,16 @@ def value_counts(
             This parameter is not yet supported and must be set to True (the
             default value). It exists to ensure compatibiliy with
             `Series.value_counts`.
-            Don't include counts of rows containing NaN.
+            Don't include counts of rows containing NA values.
+
         Returns
         -------
-        counts : Series
+        Series
+
         See Also
         --------
         Series.value_counts: Equivalent method on Series.
+
         Examples
         --------
         >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
@@ -5162,6 +5166,7 @@ def value_counts(
             raise NotImplementedError(
                 "`dropna=False` not yet supported for DataFrames."
             )
+
         if (bins is not None) and (len(subset) > 1):
             raise NotImplementedError(
                 "`bins` parameter not yet supported for more than one column."
diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
index 565ff4fcdf3b8..28089e5bd34be 100644
--- a/pandas/tests/frame/test_value_counts.py
+++ b/pandas/tests/frame/test_value_counts.py
@@ -10,6 +10,7 @@ def test_data_frame_value_counts_unsorted():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     result = df.value_counts(sort=False)
     expected = pd.Series(
         data=[1, 2, 1],
@@ -17,6 +18,7 @@ def test_data_frame_value_counts_unsorted():
             [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"]
         ),
     )
+
     tm.assert_series_equal(result, expected)
 
 
@@ -25,6 +27,7 @@ def test_data_frame_value_counts_ascending():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     result = df.value_counts(ascending=True)
     expected = pd.Series(
         data=[1, 1, 2],
@@ -32,6 +35,7 @@ def test_data_frame_value_counts_ascending():
             [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"]
         ),
     )
+
     tm.assert_series_equal(result, expected)
 
 
@@ -40,6 +44,7 @@ def test_data_frame_value_counts_default():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     result = df.value_counts()
     expected = pd.Series(
         data=[2, 1, 1],
@@ -47,6 +52,7 @@ def test_data_frame_value_counts_default():
             [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
         ),
     )
+
     tm.assert_series_equal(result, expected)
 
 
@@ -55,6 +61,7 @@ def test_data_frame_value_counts_normalize():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     result = df.value_counts(normalize=True)
     expected = pd.Series(
         data=[0.5, 0.25, 0.25],
@@ -62,6 +69,7 @@ def test_data_frame_value_counts_normalize():
             [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
         ),
     )
+
     tm.assert_series_equal(result, expected)
 
 
@@ -70,6 +78,7 @@ def test_data_frame_value_counts_dropna_not_supported_yet():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     with pytest.raises(NotImplementedError, match="not yet supported"):
         df.value_counts(dropna=False)
 
@@ -79,33 +88,40 @@ def test_data_frame_value_counts_bins_not_supported():
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
+
     with pytest.raises(NotImplementedError, match="not yet supported"):
         df.value_counts(bins=2)
- 
- 
+
+
 def test_data_frame_value_counts_single_col_default():
     df = pd.DataFrame(
         {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
         index=["falcon", "dog", "cat", "ant"],
     )
     df_single_col = df[["num_legs"]]
+
     result = df_single_col.value_counts()
     expected = pd.Series(
         data=[2, 1, 1],
         index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]),
     )
+
     tm.assert_series_equal(result, expected)
 
 
 def test_data_frame_value_counts_empty():
     df_no_cols = pd.DataFrame()
+
     result = df_no_cols.value_counts()
     expected = pd.Series([], dtype=np.int64)
+
     tm.assert_series_equal(result, expected)
 
 
 def test_data_frame_value_counts_empty_normalize():
     df_no_cols = pd.DataFrame()
+
     result = df_no_cols.value_counts(normalize=True)
     expected = pd.Series([], dtype=np.float64)
+
     tm.assert_series_equal(result, expected)

From aa96c9805becb5c6db61a18a9c87435aa0e412b2 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 08:35:38 -0600
Subject: [PATCH 06/26] Sort imports

---
 pandas/tests/frame/test_value_counts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
index 28089e5bd34be..5dc51d0c685aa 100644
--- a/pandas/tests/frame/test_value_counts.py
+++ b/pandas/tests/frame/test_value_counts.py
@@ -1,6 +1,6 @@
+import numpy as np
 import pytest
 
-import numpy as np
 import pandas as pd
 import pandas._testing as tm
 

From aef75ae62ac68060f40347c5f69399fafabd8c03 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 09:40:19 -0600
Subject: [PATCH 07/26] Remove typing for now

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2545d97ff9c79..f6be59754d812 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5066,7 +5066,7 @@ def sort_index(
 
     def value_counts(
         self,
-        subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
+        subset=None,
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,

From acb81cc73ae39da619fe9a9e15f2ff8d8db11550 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 15:59:59 -0600
Subject: [PATCH 08/26] Simplify test a little

---
 pandas/tests/frame/test_value_counts.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
index 5dc51d0c685aa..ef2dc790940b0 100644
--- a/pandas/tests/frame/test_value_counts.py
+++ b/pandas/tests/frame/test_value_counts.py
@@ -94,13 +94,9 @@ def test_data_frame_value_counts_bins_not_supported():
 
 
 def test_data_frame_value_counts_single_col_default():
-    df = pd.DataFrame(
-        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
-        index=["falcon", "dog", "cat", "ant"],
-    )
-    df_single_col = df[["num_legs"]]
+    df = pd.DataFrame({"num_legs": [2, 4, 4, 6]})
 
-    result = df_single_col.value_counts()
+    result = df.value_counts()
     expected = pd.Series(
         data=[2, 1, 1],
         index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]),

From 786de344cdda81561b0e6e53d1a3bc586d6a0813 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 16:00:45 -0600
Subject: [PATCH 09/26] Remove single col example for now

---
 pandas/core/frame.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f6be59754d812..8e27af019c30b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5149,14 +5149,6 @@ def value_counts(
         6         0            0.25
         2         2            0.25
         dtype: float64
-        >>> single_col_df = df[['num_legs']]
-        >>> single_col_df.value_counts(bins=4)
-        num_legs
-        (3.0, 4.0]      2
-        (5.0, 6.0]      1
-        (1.995, 3.0]    1
-        (4.0, 5.0]      0
-        dtype: int64
         """
         if subset is None:
             subset = self.columns.tolist()

From 7eba59ae8e72ee7a4bf9e3db9bbe9b8e6b831b4c Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Thu, 23 Jan 2020 16:52:27 -0600
Subject: [PATCH 10/26] Update error for bins

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8e27af019c30b..5a7e3f7c8e1c9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5159,9 +5159,9 @@ def value_counts(
                 "`dropna=False` not yet supported for DataFrames."
             )
 
-        if (bins is not None) and (len(subset) > 1):
+        if bins is not None:
             raise NotImplementedError(
-                "`bins` parameter not yet supported for more than one column."
+                "`bins` parameter not yet supported for DataFrames."
             )
 
         counts = self.groupby(subset).size()

From 60554e91661be4b349259ceda6bdf47e71469438 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Fri, 24 Jan 2020 13:00:55 -0600
Subject: [PATCH 11/26] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5a7e3f7c8e1c9..a777da3b95032 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5066,7 +5066,7 @@ def sort_index(
 
     def value_counts(
         self,
-        subset=None,
+        subset: Sequence[Label] = None,
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,

From 4c4e858ffe0c0a59142904182e9f094e3464fd04 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Fri, 24 Jan 2020 13:01:01 -0600
Subject: [PATCH 12/26] Update pandas/core/frame.py

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a777da3b95032..3c1b99e7e364f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5070,7 +5070,7 @@ def value_counts(
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,
-        bins=None,
+        bins: Optional[int] = None,
         dropna: bool = True,
     ):
         """

From 07f0e762ceeb6b493fd05a2d0eb67d2a965d886c Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Fri, 24 Jan 2020 13:01:59 -0600
Subject: [PATCH 13/26] Import Label type

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3c1b99e7e364f..c6170abac437d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -39,7 +39,7 @@
 from pandas._config import get_option
 
 from pandas._libs import algos as libalgos, lib
-from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Level, Renamer
+from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
 from pandas.compat import PY37
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv

From 957a8ecb78585363ebb4b62906442b43dd3995a3 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Fri, 24 Jan 2020 13:13:59 -0600
Subject: [PATCH 14/26] Make Sequence optional

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6421cc466fbf3..c6aa4048cc90f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5072,7 +5072,7 @@ def sort_index(
 
     def value_counts(
         self,
-        subset: Sequence[Label] = None,
+        subset: Optional[Sequence[Label]] = None,
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,

From 4fee5e038bdeff1837c32937b199c98dbff01372 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Fri, 24 Jan 2020 15:27:58 -0600
Subject: [PATCH 15/26] Fix docstring

---
 pandas/core/frame.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c6aa4048cc90f..c0b45801dc4bb 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5091,22 +5091,22 @@ def value_counts(
 
         Parameters
         ----------
-        subset : list-like, default self.columns
+        subset : list-like, optional
             Columns to use when counting unique combinations.
-        normalize : boolean, default False
+        normalize : bool, default False
             Return proportions rather than frequencies.
-        sort : boolean, default True
+        sort : bool, default True
             Sort by frequencies.
-        ascending : boolean, default False
+        ascending : bool, default False
             Sort in ascending order.
-        bins : integer, optional
+        bins : int, optional
             This parameter is not yet supported and must be set to None (the
             default value). It exists to ensure compatibiliy with
             `Series.value_counts`.
             Rather than count values, group them into half-open bins,
             a convenience for ``pd.cut``, only works with single-column numeric
             data.
-        dropna : boolean, default True
+        dropna : bool, default True
             This parameter is not yet supported and must be set to True (the
             default value). It exists to ensure compatibiliy with
             `Series.value_counts`.

From b8f4126453f5708ba140cc36853d640e59def874 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 26 Jan 2020 00:51:45 -0600
Subject: [PATCH 16/26] Clean docstring

---
 pandas/core/frame.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c0b45801dc4bb..f4457bba02fdc 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5081,7 +5081,9 @@ def value_counts(
     ):
         """
         Return a Series containing counts of unique rows in the DataFrame.
+
         .. versionadded:: 1.1.0
+
         The returned Series will have a MultiIndex with one level per input
         column.
         By default, rows that contain any NA values are omitted from the
@@ -5131,24 +5133,28 @@ def value_counts(
         dog            4          0
         cat            4          0
         ant            6          0
+
         >>> df.value_counts()
         num_legs  num_wings
         4         0            2
         6         0            1
         2         2            1
         dtype: int64
+
         >>> df.value_counts(sort=False)
         num_legs  num_wings
         2         2            1
         4         0            2
         6         0            1
         dtype: int64
+
         >>> df.value_counts(ascending=True)
         num_legs  num_wings
         2         2            1
         6         0            1
         4         0            2
         dtype: int64
+
         >>> df.value_counts(normalize=True)
         num_legs  num_wings
         4         0            0.50

From 310c688d077b4be38ac78ae63a79592ec44ec737 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 26 Jan 2020 00:58:12 -0600
Subject: [PATCH 17/26] Update to comments

---
 pandas/core/frame.py | 28 ++--------------------------
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f4457bba02fdc..434752b1bd60d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5076,8 +5076,6 @@ def value_counts(
         normalize: bool = False,
         sort: bool = True,
         ascending: bool = False,
-        bins: Optional[int] = None,
-        dropna: bool = True,
     ):
         """
         Return a Series containing counts of unique rows in the DataFrame.
@@ -5101,18 +5099,6 @@ def value_counts(
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int, optional
-            This parameter is not yet supported and must be set to None (the
-            default value). It exists to ensure compatibiliy with
-            `Series.value_counts`.
-            Rather than count values, group them into half-open bins,
-            a convenience for ``pd.cut``, only works with single-column numeric
-            data.
-        dropna : bool, default True
-            This parameter is not yet supported and must be set to True (the
-            default value). It exists to ensure compatibiliy with
-            `Series.value_counts`.
-            Don't include counts of rows containing NA values.
 
         Returns
         -------
@@ -5163,18 +5149,7 @@ def value_counts(
         dtype: float64
         """
         if subset is None:
-            subset = self.columns.tolist()
-
-        # Some features not supported yet
-        if not dropna:
-            raise NotImplementedError(
-                "`dropna=False` not yet supported for DataFrames."
-            )
-
-        if bins is not None:
-            raise NotImplementedError(
-                "`bins` parameter not yet supported for DataFrames."
-            )
+            subset = self.columns
 
         counts = self.groupby(subset).size()
 
@@ -5182,6 +5157,7 @@ def value_counts(
             counts = counts.sort_values(ascending=ascending)
         if normalize:
             counts /= counts.sum()
+
         # Force MultiIndex for single column
         if len(subset) == 1:
             counts.index = MultiIndex.from_arrays(

From a2660217d1765fd0f52b49e27f9abf4a8d1501cd Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 26 Jan 2020 01:18:39 -0600
Subject: [PATCH 18/26] Add to Series See Also

---
 pandas/core/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 6ad237cbc7c51..0986ff3942c10 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1202,6 +1202,7 @@ def value_counts(
         --------
         Series.count: Number of non-NA elements in a Series.
         DataFrame.count: Number of non-NA elements in a DataFrame.
+        DataFrame.value_counts: Equivalent method on DataFrames.
 
         Examples
         --------

From d738bf7d5f4cbbf221e6f45e4bb51c71cd4333b8 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 26 Jan 2020 11:19:23 -0600
Subject: [PATCH 19/26] Update tests and add back tolist

---
 pandas/core/frame.py                    |  2 +-
 pandas/tests/frame/test_value_counts.py | 20 --------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 434752b1bd60d..e73a3088aa047 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5149,7 +5149,7 @@ def value_counts(
         dtype: float64
         """
         if subset is None:
-            subset = self.columns
+            subset = self.columns.tolist()
 
         counts = self.groupby(subset).size()
 
diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
index ef2dc790940b0..ec4e148462354 100644
--- a/pandas/tests/frame/test_value_counts.py
+++ b/pandas/tests/frame/test_value_counts.py
@@ -73,26 +73,6 @@ def test_data_frame_value_counts_normalize():
     tm.assert_series_equal(result, expected)
 
 
-def test_data_frame_value_counts_dropna_not_supported_yet():
-    df = pd.DataFrame(
-        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
-        index=["falcon", "dog", "cat", "ant"],
-    )
-
-    with pytest.raises(NotImplementedError, match="not yet supported"):
-        df.value_counts(dropna=False)
-
-
-def test_data_frame_value_counts_bins_not_supported():
-    df = pd.DataFrame(
-        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
-        index=["falcon", "dog", "cat", "ant"],
-    )
-
-    with pytest.raises(NotImplementedError, match="not yet supported"):
-        df.value_counts(bins=2)
-
-
 def test_data_frame_value_counts_single_col_default():
     df = pd.DataFrame({"num_legs": [2, 4, 4, 6]})
 

From 26182203a580806b4fa3c575abd3b057207f4340 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 26 Jan 2020 12:04:32 -0600
Subject: [PATCH 20/26] Don't import pytest

---
 pandas/tests/frame/test_value_counts.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
index ec4e148462354..c409b0bbe6fa9 100644
--- a/pandas/tests/frame/test_value_counts.py
+++ b/pandas/tests/frame/test_value_counts.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 import pandas as pd
 import pandas._testing as tm

From 0d46697374b48a4d3581f92ebb03bb11fa72dc28 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 9 Feb 2020 13:03:47 -0600
Subject: [PATCH 21/26] Add to basics.rst

---
 doc/source/getting_started/basics.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 277080006cb3c..0afa451887dab 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -689,6 +689,15 @@ of a 1D array of values. It can also be used as a function on regular arrays:
    s.value_counts()
    pd.value_counts(data)
 
+The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
+By default all columns are used but a subset can be selected using the ``subset`` argument.
+
+.. ipython:: python
+
+    data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]}
+    df = pd.DataFrame(data)
+    df.value_counts()
+
 Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame:
 
 .. ipython:: python

From 81991a10a7681855fd2fed9bc06747150fa7bd82 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 9 Feb 2020 13:07:50 -0600
Subject: [PATCH 22/26] Move to Notes

---
 pandas/core/frame.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 50ae785ae039d..db12e9eb0e58d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5091,13 +5091,6 @@ def value_counts(
 
         .. versionadded:: 1.1.0
 
-        The returned Series will have a MultiIndex with one level per input
-        column.
-        By default, rows that contain any NA values are omitted from the
-        result.
-        By default, the resulting Series will be in descending order so that the
-        first element is the most frequently-occurring row.
-
         Parameters
         ----------
         subset : list-like, optional
@@ -5117,6 +5110,13 @@ def value_counts(
         --------
         Series.value_counts: Equivalent method on Series.
 
+        Notes
+        -----
+        The returned Series will have a MultiIndex with one level per input
+        column. By default, rows that contain any NA values are omitted from
+        the result. By default, the resulting Series will be in descending
+        order so that the first element is the most frequently-occurring row.
+
         Examples
         --------
         >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],

From 85bc2136b2dfd2163894bf800397b182621a1d5f Mon Sep 17 00:00:00 2001
From: Daniel Saxton <danielsaxton@DANIELs-MacBook-Pro.local>
Date: Sun, 9 Feb 2020 14:14:33 -0600
Subject: [PATCH 23/26] Rename to avoid doc error

---
 doc/source/getting_started/basics.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 0afa451887dab..3f72b3d206d25 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -695,8 +695,8 @@ By default all columns are used but a subset can be selected using the ``subset`
 .. ipython:: python
 
     data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]}
-    df = pd.DataFrame(data)
-    df.value_counts()
+    frame = pd.DataFrame(data)
+    frame.value_counts()
 
 Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame:
 

From 47683ad74b03f262dd9e17ce964f85548a117d99 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Sun, 23 Feb 2020 12:59:37 -0600
Subject: [PATCH 24/26] Add See Also

---
 pandas/core/frame.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e4af8e3550b66..3fc10444ee064 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4569,6 +4569,10 @@ def drop_duplicates(
         -------
         DataFrame
             DataFrame with duplicates removed or None if ``inplace=True``.
+
+        See Also
+        --------
+        DataFrame.value_counts: Count unique combinations of columns.
         """
         if self.empty:
             return self.copy()

From e60de83d7cf7dabcba59a4f8519db5b45d414032 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Sun, 23 Feb 2020 13:00:30 -0600
Subject: [PATCH 25/26] Move tests

---
 pandas/tests/frame/{ => methods}/test_value_counts.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pandas/tests/frame/{ => methods}/test_value_counts.py (100%)

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
similarity index 100%
rename from pandas/tests/frame/test_value_counts.py
rename to pandas/tests/frame/methods/test_value_counts.py

From 3903a4db317c0e981b3af2dec3030dc92f95621e Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Sun, 23 Feb 2020 13:07:21 -0600
Subject: [PATCH 26/26] versionadded

---
 doc/source/getting_started/basics.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 3f72b3d206d25..c6d9a48fcf8ed 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -689,6 +689,8 @@ of a 1D array of values. It can also be used as a function on regular arrays:
    s.value_counts()
    pd.value_counts(data)
 
+.. versionadded:: 1.1.0
+
 The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
 By default all columns are used but a subset can be selected using the ``subset`` argument.