From 159d54e61c8bc2b9bf8e8543e373e39981356eae Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 2 Feb 2020 13:22:21 +0000
Subject: [PATCH 01/16] first draft

---
 doc/source/whatsnew/v1.1.0.rst            |   2 +-
 pandas/__init__.py                        |   1 +
 pandas/core/reshape/api.py                |   2 +-
 pandas/core/reshape/reshape.py            | 148 ++++++++++++++++++++--
 pandas/tests/mytest.py                    |  10 ++
 pandas/tests/reshape/test_from_dummies.py |  63 +++++++++
 6 files changed, 210 insertions(+), 16 deletions(-)
 create mode 100644 pandas/tests/mytest.py
 create mode 100644 pandas/tests/reshape/test_from_dummies.py

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 13827e8fc4c33..68059bea56cd2 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -43,7 +43,7 @@ Other enhancements
 
 - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
 - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
--
+- We have added a :meth:`pandas.from_dummies`, which is an inverse transformation of :meth:`pandas.get_dummies` (:issue:`8745`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/__init__.py b/pandas/__init__.py
index d526531b159b2..1c379ab2544d3 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -135,6 +135,7 @@
     get_dummies,
     cut,
     qcut,
+    from_dummies,
 )
 
 import pandas.api
diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py
index 3c76eef809c7a..7054926b9c0c4 100644
--- a/pandas/core/reshape/api.py
+++ b/pandas/core/reshape/api.py
@@ -4,5 +4,5 @@
 from pandas.core.reshape.melt import lreshape, melt, wide_to_long
 from pandas.core.reshape.merge import merge, merge_asof, merge_ordered
 from pandas.core.reshape.pivot import crosstab, pivot, pivot_table
-from pandas.core.reshape.reshape import get_dummies
+from pandas.core.reshape.reshape import from_dummies, get_dummies
 from pandas.core.reshape.tile import cut, qcut
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 359e5b956f8a5..d6e4212bce60a 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -751,6 +751,138 @@ def _convert_level_number(level_num, columns):
     return result
 
 
+def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_first=None):
+    """
+    The inverse transformation of ``pandas.get_dummies``.
+
+    Parameters
+    ----------
+    data : DataFrame
+    columns : list-like, default None
+        Column names in the DataFrame to be decoded.
+        If `columns` is None then all the columns will be converted.
+    prefix_sep : str, default '_'
+        Separator between original column name and dummy variable
+    dtype : dtype, default 'category'
+        Data dtype for new columns - only a single data type is allowed
+    fill_first : str, list, or dict, default None
+        Used to fill rows for which all the dummy variables are 0
+
+    Returns
+    -------
+    transformed : DataFrame
+
+    Examples
+    --------
+    Say we have a dataframe where some variables have been dummified:
+
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "animal_baboon": [0, 0, 1],
+    ...         "animal_lemur": [0, 1, 0],
+    ...         "animal_zebra": [1, 0, 0],
+    ...         "other_col": ["a", "b", "c"],
+    ...     }
+    ... )
+    >>> df
+       animal_baboon  animal_lemur  animal_zebra other_col
+    0              0             0             1         a
+    1              0             1             0         b
+    2              1             0             0         c
+
+    We can recover the original dataframe using `from_dummies`:
+
+    >>> pd.from_dummies(df, columns=['animal'])
+      other_col  animal
+    0         a   zebra
+    1         b   lemur
+    2         c  baboon
+
+    Suppose our dataframe has one column from each dummified column
+    dropped:
+
+    >>> df = df.drop('animal_zebra', axis=1)
+    >>> df
+       animal_baboon  animal_lemur other_col
+    0              0             0         a
+    1              0             1         b
+    2              1             0         c
+
+    We can still recover the original dataframe, by using the argument
+    `fill_first`:
+
+    >>> pd.from_dummies(df, columns=["animal"], fill_first=["zebra"])
+      other_col  animal
+    0         a   zebra
+    1         b   lemur
+    2         c  baboon
+    """
+    if dtype is None:
+        dtype = "category"
+
+    if columns is None:
+        data_to_decode = data.copy()
+        columns = data.columns.tolist()
+        columns = list(
+            {i.split(prefix_sep)[0] for i in data.columns if prefix_sep in i}
+        )
+
+    data_to_decode = data[
+        [i for i in data.columns for c in columns if i.startswith(c + prefix_sep)]
+    ]
+
+    # Check each row sums to 1 or 0
+    if not all(i in [0, 1] for i in data_to_decode.sum(axis=1).unique().tolist()):
+        raise ValueError(
+            "Data cannot be decoded! Each row must contain only 0s and"
+            " 1s, and each row may have at most one 1"
+        )
+
+    if fill_first is None:
+        fill_first = [None] * len(columns)
+    elif isinstance(fill_first, str):
+        fill_first = itertools.cycle([fill_first])
+    elif isinstance(fill_first, dict):
+        fill_first = [fill_first[col] for col in columns]
+
+    out = data.copy()
+    for column, fill_first_ in zip(columns, fill_first):
+        cols, labels = [
+            [
+                i.replace(x, "")
+                for i in data_to_decode.columns
+                if column + prefix_sep in i
+            ]
+            for x in ["", column + prefix_sep]
+        ]
+        if not cols:
+            continue
+        out = out.drop(cols, axis=1)
+        if fill_first_:
+            cols = [column + prefix_sep + fill_first_] + cols
+            labels = [fill_first_] + labels
+            data[cols[0]] = (1 - data[cols[1:]]).all(axis=1)
+        out[column] = Series(
+            np.array(labels)[np.argmax(data[cols].to_numpy(), axis=1)], dtype=dtype
+        )
+    return out
+
+
+def _check_len(item, name, data_to_encode):
+    """ Validate prefixes and separator to avoid silently dropping cols. """
+    len_msg = (
+        "Length of '{name}' ({len_item}) did not match the "
+        "length of the columns being encoded ({len_enc})."
+    )
+
+    if is_list_like(item):
+        if not len(item) == data_to_encode.shape[1]:
+            len_msg = len_msg.format(
+                name=name, len_item=len(item), len_enc=data_to_encode.shape[1]
+            )
+            raise ValueError(len_msg)
+
+
 def get_dummies(
     data,
     prefix=None,
@@ -871,20 +1003,8 @@ def get_dummies(
         else:
             data_to_encode = data[columns]
 
-        # validate prefixes and separator to avoid silently dropping cols
-        def check_len(item, name):
-
-            if is_list_like(item):
-                if not len(item) == data_to_encode.shape[1]:
-                    len_msg = (
-                        f"Length of '{name}' ({len(item)}) did not match the "
-                        "length of the columns being encoded "
-                        f"({data_to_encode.shape[1]})."
-                    )
-                    raise ValueError(len_msg)
-
-        check_len(prefix, "prefix")
-        check_len(prefix_sep, "prefix_sep")
+        _check_len(prefix, "prefix", data_to_encode)
+        _check_len(prefix_sep, "prefix_sep", data_to_encode)
 
         if isinstance(prefix, str):
             prefix = itertools.cycle([prefix])
diff --git a/pandas/tests/mytest.py b/pandas/tests/mytest.py
new file mode 100644
index 0000000000000..3e8916b75e977
--- /dev/null
+++ b/pandas/tests/mytest.py
@@ -0,0 +1,10 @@
+import pandas as pd
+
+def test_me():
+    pd.eval(
+        """
+        A = df.A - df.B
+        B = df.A + df.B
+        """,
+        target=pd.DataFrame(),
+    )
\ No newline at end of file
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
new file mode 100644
index 0000000000000..a416a9abecc3d
--- /dev/null
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -0,0 +1,63 @@
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "dtype, expected_dict",
+    [
+        ("str", {"col1": ["a", "a", "b"]}),
+        (str, {"col1": ["a", "a", "b"]},),
+        (None, {"col1": ["a", "a", "b"]}),
+    ],
+)
+def test_dtype(dtype, expected_dict):
+    df = pd.DataFrame({"col1_a": [1, 1, 0], "col1_b": [0, 0, 1]})
+    result = pd.from_dummies(df, dtype=dtype)
+    expected = pd.DataFrame(expected_dict)
+    if dtype is None:
+        expected = expected.astype("category")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "fill_first, expected_dict",
+    [
+        ("a", {"col1": ["a", "a", "b"]}),
+        (["a"], {"col1": ["a", "a", "b"]}),
+        ({"col1": "a"}, {"col1": ["a", "a", "b"]}),
+    ],
+)
+def test_fill_first(fill_first, expected_dict):
+    df = pd.DataFrame({"col1_b": [0, 0, 1]})
+    result = pd.from_dummies(df, fill_first=fill_first)
+    # get_dummies changes the ordering of columns,
+    # see https://github.com/pandas-dev/pandas/issues/17612
+    expected = pd.DataFrame(expected_dict, dtype="category")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_malformed():
+    df = pd.DataFrame({"col1_a": [1, 1, 0], "col1_b": [1, 0, 1]})
+    msg = (
+        "Data cannot be decoded! Each row must contain only 0s and 1s"
+        ", and each row may have at most one 1"
+    )
+    with pytest.raises(ValueError, match=msg):
+        pd.from_dummies(df)
+
+
+@pytest.mark.parametrize(
+    "prefix_sep, input_dict",
+    [
+        ("_", {"col1_a": [1, 1, 0], "col1_b": [0, 0, 1]}),
+        ("*", {"col1*a": [1, 1, 0], "col1*b": [0, 0, 1]}),
+        (".", {"col1.a": [1, 1, 0], "col1.b": [0, 0, 1]}),
+    ],
+)
+def test_prefix_sep(prefix_sep, input_dict):
+    df = pd.DataFrame(input_dict)
+    result = pd.from_dummies(df, prefix_sep=prefix_sep)
+    expected = pd.DataFrame({"col1": ["a", "a", "b"]}, dtype="category")
+    tm.assert_frame_equal(result, expected)

From 3bfafe6eab39c61745169abcdb9638911c4af132 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 9 Feb 2020 16:18:59 +0000
Subject: [PATCH 02/16] rename columns to prefix

---
 pandas/core/reshape/reshape.py | 36 ++++++++++++++++------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index d6e4212bce60a..e1e5b96f8b052 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -751,16 +751,16 @@ def _convert_level_number(level_num, columns):
     return result
 
 
-def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_first=None):
+def from_dummies(data, prefix=None, prefix_sep="_", dtype="category", fill_first=None):
     """
     The inverse transformation of ``pandas.get_dummies``.
 
     Parameters
     ----------
     data : DataFrame
-    columns : list-like, default None
-        Column names in the DataFrame to be decoded.
-        If `columns` is None then all the columns will be converted.
+    prefix : list-like, default None
+        Prefixes of the columns in the DataFrame to be decoded.
+        If `prefix` is None then all the columns will be decoded.
     prefix_sep : str, default '_'
         Separator between original column name and dummy variable
     dtype : dtype, default 'category'
@@ -792,7 +792,7 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
 
     We can recover the original dataframe using `from_dummies`:
 
-    >>> pd.from_dummies(df, columns=['animal'])
+    >>> pd.from_dummies(df, prefix=['animal'])
       other_col  animal
     0         a   zebra
     1         b   lemur
@@ -811,7 +811,7 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
     We can still recover the original dataframe, by using the argument
     `fill_first`:
 
-    >>> pd.from_dummies(df, columns=["animal"], fill_first=["zebra"])
+    >>> pd.from_dummies(df, prefix=["animal"], fill_first=["zebra"])
       other_col  animal
     0         a   zebra
     1         b   lemur
@@ -820,15 +820,13 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
     if dtype is None:
         dtype = "category"
 
-    if columns is None:
+    if prefix is None:
         data_to_decode = data.copy()
-        columns = data.columns.tolist()
-        columns = list(
-            {i.split(prefix_sep)[0] for i in data.columns if prefix_sep in i}
-        )
+        prefix = data.columns.tolist()
+        prefix = list({i.split(prefix_sep)[0] for i in data.columns if prefix_sep in i})
 
     data_to_decode = data[
-        [i for i in data.columns for c in columns if i.startswith(c + prefix_sep)]
+        [i for i in data.columns for p in prefix if i.startswith(p + prefix_sep)]
     ]
 
     # Check each row sums to 1 or 0
@@ -839,30 +837,30 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
         )
 
     if fill_first is None:
-        fill_first = [None] * len(columns)
+        fill_first = [None] * len(prefix)
     elif isinstance(fill_first, str):
         fill_first = itertools.cycle([fill_first])
     elif isinstance(fill_first, dict):
-        fill_first = [fill_first[col] for col in columns]
+        fill_first = [fill_first[p] for p in prefix]
 
     out = data.copy()
-    for column, fill_first_ in zip(columns, fill_first):
+    for prefix_, fill_first_ in zip(prefix, fill_first):
         cols, labels = [
             [
                 i.replace(x, "")
                 for i in data_to_decode.columns
-                if column + prefix_sep in i
+                if prefix_ + prefix_sep in i
             ]
-            for x in ["", column + prefix_sep]
+            for x in ["", prefix_ + prefix_sep]
         ]
         if not cols:
             continue
         out = out.drop(cols, axis=1)
         if fill_first_:
-            cols = [column + prefix_sep + fill_first_] + cols
+            cols = [prefix_ + prefix_sep + fill_first_] + cols
             labels = [fill_first_] + labels
             data[cols[0]] = (1 - data[cols[1:]]).all(axis=1)
-        out[column] = Series(
+        out[prefix_] = Series(
             np.array(labels)[np.argmax(data[cols].to_numpy(), axis=1)], dtype=dtype
         )
     return out

From 15f68187ab5cb34d83368ea8c322d0a6d811dde7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 9 Feb 2020 16:27:10 +0000
Subject: [PATCH 03/16] fix docstring

---
 pandas/core/reshape/reshape.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index e1e5b96f8b052..0bfc227ee86ee 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -751,26 +751,28 @@ def _convert_level_number(level_num, columns):
     return result
 
 
-def from_dummies(data, prefix=None, prefix_sep="_", dtype="category", fill_first=None):
+def from_dummies(data, prefix=None, prefix_sep="_", dtype="category", fill_first=None) -> "DataFrame":
     """
     The inverse transformation of ``pandas.get_dummies``.
 
     Parameters
     ----------
     data : DataFrame
+        Data which contains dummy indicators.
     prefix : list-like, default None
         Prefixes of the columns in the DataFrame to be decoded.
         If `prefix` is None then all the columns will be decoded.
     prefix_sep : str, default '_'
-        Separator between original column name and dummy variable
+        Separator between original column name and dummy variable.
     dtype : dtype, default 'category'
-        Data dtype for new columns - only a single data type is allowed
+        Data dtype for new columns - only a single data type is allowed.
     fill_first : str, list, or dict, default None
-        Used to fill rows for which all the dummy variables are 0
+        Used to fill rows for which all the dummy variables are 0.
 
     Returns
     -------
-    transformed : DataFrame
+    DataFrame
+        Decoded data.
 
     Examples
     --------

From 9eed071ba5c875094b34ac8b2474e3dca2cf6a16 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <m.e.gorelli@gmail.com>
Date: Sun, 9 Feb 2020 16:35:15 +0000
Subject: [PATCH 04/16] black

---
 pandas/core/reshape/reshape.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 0bfc227ee86ee..9255b8639490d 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -751,7 +751,9 @@ def _convert_level_number(level_num, columns):
     return result
 
 
-def from_dummies(data, prefix=None, prefix_sep="_", dtype="category", fill_first=None) -> "DataFrame":
+def from_dummies(
+    data, prefix=None, prefix_sep="_", dtype="category", fill_first=None
+) -> "DataFrame":
     """
     The inverse transformation of ``pandas.get_dummies``.
 

From f5d90888e1d3382f38e3217d3c158148af8c35ae Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 15:53:20 +0000
Subject: [PATCH 05/16] infer prefixes

---
 pandas/core/reshape/reshape.py            | 138 ++++++++++++----------
 pandas/tests/reshape/test_from_dummies.py |  29 ++---
 2 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 9255b8639490d..d67912f7c7668 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -752,7 +752,7 @@ def _convert_level_number(level_num, columns):
 
 
 def from_dummies(
-    data, prefix=None, prefix_sep="_", dtype="category", fill_first=None
+    data, prefix=None, prefix_sep="_", dtype="category"
 ) -> "DataFrame":
     """
     The inverse transformation of ``pandas.get_dummies``.
@@ -762,14 +762,13 @@ def from_dummies(
     data : DataFrame
         Data which contains dummy indicators.
     prefix : list-like, default None
-        Prefixes of the columns in the DataFrame to be decoded.
-        If `prefix` is None then all the columns will be decoded.
+        How to name the decoded groups of columns. If there are columns
+        containing `prefix_sep`, then the part of their name preceding
+        `prefix_sep` will be used (see examples below).
     prefix_sep : str, default '_'
         Separator between original column name and dummy variable.
     dtype : dtype, default 'category'
         Data dtype for new columns - only a single data type is allowed.
-    fill_first : str, list, or dict, default None
-        Used to fill rows for which all the dummy variables are 0.
 
     Returns
     -------
@@ -782,90 +781,105 @@ def from_dummies(
 
     >>> df = pd.DataFrame(
     ...     {
-    ...         "animal_baboon": [0, 0, 1],
-    ...         "animal_lemur": [0, 1, 0],
-    ...         "animal_zebra": [1, 0, 0],
-    ...         "other_col": ["a", "b", "c"],
+    ...         "baboon": [0, 0, 1],
+    ...         "lemur": [0, 1, 0],
+    ...         "zebra": [1, 0, 0],
     ...     }
     ... )
     >>> df
-       animal_baboon  animal_lemur  animal_zebra other_col
-    0              0             0             1         a
-    1              0             1             0         b
-    2              1             0             0         c
+       baboon  lemur  zebra
+    0       0      0      1
+    1       0      1      0
+    2       1      0      0
 
     We can recover the original dataframe using `from_dummies`:
 
-    >>> pd.from_dummies(df, prefix=['animal'])
-      other_col  animal
-    0         a   zebra
-    1         b   lemur
-    2         c  baboon
+    >>> pd.from_dummies(df, prefix='animal')
+      animal
+    0  zebra
+    1  lemur
+    2 baboon
 
-    Suppose our dataframe has one column from each dummified column
-    dropped:
+    If our dataframe already has columns with `prefix_sep` in them,
+    we don't need to pass in the `prefix` argument:
 
-    >>> df = df.drop('animal_zebra', axis=1)
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "animal_baboon": [0, 0, 1],
+    ...         "animal_lemur": [0, 1, 0],
+    ...         "animal_zebra": [1, 0, 0],
+    ...         "other": ['a', 'b', 'c'],
+    ...     }
+    ... )
     >>> df
-       animal_baboon  animal_lemur other_col
-    0              0             0         a
-    1              0             1         b
-    2              1             0         c
-
-    We can still recover the original dataframe, by using the argument
-    `fill_first`:
-
-    >>> pd.from_dummies(df, prefix=["animal"], fill_first=["zebra"])
-      other_col  animal
-    0         a   zebra
-    1         b   lemur
-    2         c  baboon
+       animal_baboon  animal_lemur  animal_zebra other
+    0              0             0             1     a
+    1              0             1             0     b
+    2              1             0             0     c
+
+    >>> pd.from_dummies(df)
+      other  animal
+    0     a   zebra
+    1     b   lemur
+    2     c  baboon
     """
     if dtype is None:
         dtype = "category"
 
-    if prefix is None:
-        data_to_decode = data.copy()
-        prefix = data.columns.tolist()
-        prefix = list({i.split(prefix_sep)[0] for i in data.columns if prefix_sep in i})
+    columns_to_decode = [i for i in data.columns if prefix_sep in i]
+    if not columns_to_decode:
+        if prefix is None:
+            raise ValueError(
+                "If no columns contain `prefix_sep`, you must"
+                " pass a value to `prefix` with which to name"
+                " the decoded columns."
+                )
+        # If no column contains `prefix_sep`, we add `prefix`_`prefix_sep` to
+        # each column.
+        out = data.rename(columns = lambda x: f'{prefix}{prefix_sep}{x}').copy()
+        columns_to_decode = out.columns
+    else:
+        out = data.copy()
 
-    data_to_decode = data[
-        [i for i in data.columns for p in prefix if i.startswith(p + prefix_sep)]
-    ]
+    data_to_decode = out[columns_to_decode]
 
-    # Check each row sums to 1 or 0
-    if not all(i in [0, 1] for i in data_to_decode.sum(axis=1).unique().tolist()):
-        raise ValueError(
-            "Data cannot be decoded! Each row must contain only 0s and"
-            " 1s, and each row may have at most one 1"
-        )
+    if prefix is None:
+        # If no prefix has been passed, extract it from columns containing
+        # `prefix_sep`
+        seen = set()
+        prefix = []
+        for i in columns_to_decode:
+            i = i.split(prefix_sep)[0]
+            if i in seen:
+                continue
+            seen.add(i)
+            prefix.append(i)
+    elif isinstance(prefix, str):
+        prefix = [prefix]
 
-    if fill_first is None:
-        fill_first = [None] * len(prefix)
-    elif isinstance(fill_first, str):
-        fill_first = itertools.cycle([fill_first])
-    elif isinstance(fill_first, dict):
-        fill_first = [fill_first[p] for p in prefix]
+    # Check each row sums to 1 or 0
+    def _validate_values(data):
+        if not all(i in [0, 1] for i in data.sum(axis=1).unique().tolist()):
+            raise ValueError(
+                "Data cannot be decoded! Each row must contain only 0s and"
+                " 1s, and each row may have at most one 1."
+            )
 
-    out = data.copy()
-    for prefix_, fill_first_ in zip(prefix, fill_first):
-        cols, labels = [
+    for prefix_ in prefix:
+        cols, labels = (
             [
                 i.replace(x, "")
                 for i in data_to_decode.columns
                 if prefix_ + prefix_sep in i
             ]
             for x in ["", prefix_ + prefix_sep]
-        ]
+        )
         if not cols:
             continue
+        _validate_values(data_to_decode[cols])
         out = out.drop(cols, axis=1)
-        if fill_first_:
-            cols = [prefix_ + prefix_sep + fill_first_] + cols
-            labels = [fill_first_] + labels
-            data[cols[0]] = (1 - data[cols[1:]]).all(axis=1)
         out[prefix_] = Series(
-            np.array(labels)[np.argmax(data[cols].to_numpy(), axis=1)], dtype=dtype
+            np.array(labels)[np.argmax(data_to_decode[cols].to_numpy(), axis=1)], dtype=dtype
         )
     return out
 
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index a416a9abecc3d..50ca78aca9ee4 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -21,23 +21,6 @@ def test_dtype(dtype, expected_dict):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "fill_first, expected_dict",
-    [
-        ("a", {"col1": ["a", "a", "b"]}),
-        (["a"], {"col1": ["a", "a", "b"]}),
-        ({"col1": "a"}, {"col1": ["a", "a", "b"]}),
-    ],
-)
-def test_fill_first(fill_first, expected_dict):
-    df = pd.DataFrame({"col1_b": [0, 0, 1]})
-    result = pd.from_dummies(df, fill_first=fill_first)
-    # get_dummies changes the ordering of columns,
-    # see https://github.com/pandas-dev/pandas/issues/17612
-    expected = pd.DataFrame(expected_dict, dtype="category")
-    tm.assert_frame_equal(result, expected)
-
-
 def test_malformed():
     df = pd.DataFrame({"col1_a": [1, 1, 0], "col1_b": [1, 0, 1]})
     msg = (
@@ -61,3 +44,15 @@ def test_prefix_sep(prefix_sep, input_dict):
     result = pd.from_dummies(df, prefix_sep=prefix_sep)
     expected = pd.DataFrame({"col1": ["a", "a", "b"]}, dtype="category")
     tm.assert_frame_equal(result, expected)
+
+def test_no_prefix():
+    df = pd.DataFrame({"a": [1, 1, 0], "b": [0, 0, 1]})
+    result = pd.from_dummies(df, prefix='letter')
+    expected = pd.DataFrame({'letter': ['a', 'a', 'b']}, dtype='category')
+    tm.assert_frame_equal(result, expected)
+
+def test_multiple_columns():
+    df = pd.DataFrame({"col1_a": [1, 0], "col1_b": [0, 1], "col2_a": [0, 0], "col2_c": [1, 1]})
+    result = pd.from_dummies(df)
+    expected = pd.DataFrame({'col1': ['a', 'b'], 'col2': ['c', 'c']}, dtype='category')
+    tm.assert_frame_equal(result, expected)
\ No newline at end of file

From 4be43af882cb30216cff6d1f9588df9d258cfb6d Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 15:56:42 +0000
Subject: [PATCH 06/16] lint

---
 pandas/core/reshape/reshape.py            | 15 +++++++--------
 pandas/tests/mytest.py                    |  3 ++-
 pandas/tests/reshape/test_from_dummies.py | 14 +++++++++-----
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index d67912f7c7668..5066ec261aad5 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -1,6 +1,6 @@
 from functools import partial
 import itertools
-from typing import List, Optional, Union
+from typing import List, Optional, Set, Union
 
 import numpy as np
 
@@ -751,9 +751,7 @@ def _convert_level_number(level_num, columns):
     return result
 
 
-def from_dummies(
-    data, prefix=None, prefix_sep="_", dtype="category"
-) -> "DataFrame":
+def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFrame":
     """
     The inverse transformation of ``pandas.get_dummies``.
 
@@ -833,10 +831,10 @@ def from_dummies(
                 "If no columns contain `prefix_sep`, you must"
                 " pass a value to `prefix` with which to name"
                 " the decoded columns."
-                )
+            )
         # If no column contains `prefix_sep`, we add `prefix`_`prefix_sep` to
         # each column.
-        out = data.rename(columns = lambda x: f'{prefix}{prefix_sep}{x}').copy()
+        out = data.rename(columns=lambda x: f"{prefix}{prefix_sep}{x}").copy()
         columns_to_decode = out.columns
     else:
         out = data.copy()
@@ -846,7 +844,7 @@ def from_dummies(
     if prefix is None:
         # If no prefix has been passed, extract it from columns containing
         # `prefix_sep`
-        seen = set()
+        seen: Set[str] = set()
         prefix = []
         for i in columns_to_decode:
             i = i.split(prefix_sep)[0]
@@ -879,7 +877,8 @@ def _validate_values(data):
         _validate_values(data_to_decode[cols])
         out = out.drop(cols, axis=1)
         out[prefix_] = Series(
-            np.array(labels)[np.argmax(data_to_decode[cols].to_numpy(), axis=1)], dtype=dtype
+            np.array(labels)[np.argmax(data_to_decode[cols].to_numpy(), axis=1)],
+            dtype=dtype,
         )
     return out
 
diff --git a/pandas/tests/mytest.py b/pandas/tests/mytest.py
index 3e8916b75e977..d301fdc1ef135 100644
--- a/pandas/tests/mytest.py
+++ b/pandas/tests/mytest.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+
 def test_me():
     pd.eval(
         """
@@ -7,4 +8,4 @@ def test_me():
         B = df.A + df.B
         """,
         target=pd.DataFrame(),
-    )
\ No newline at end of file
+    )
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index 50ca78aca9ee4..477d06708a49b 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -45,14 +45,18 @@ def test_prefix_sep(prefix_sep, input_dict):
     expected = pd.DataFrame({"col1": ["a", "a", "b"]}, dtype="category")
     tm.assert_frame_equal(result, expected)
 
+
 def test_no_prefix():
     df = pd.DataFrame({"a": [1, 1, 0], "b": [0, 0, 1]})
-    result = pd.from_dummies(df, prefix='letter')
-    expected = pd.DataFrame({'letter': ['a', 'a', 'b']}, dtype='category')
+    result = pd.from_dummies(df, prefix="letter")
+    expected = pd.DataFrame({"letter": ["a", "a", "b"]}, dtype="category")
     tm.assert_frame_equal(result, expected)
 
+
 def test_multiple_columns():
-    df = pd.DataFrame({"col1_a": [1, 0], "col1_b": [0, 1], "col2_a": [0, 0], "col2_c": [1, 1]})
+    df = pd.DataFrame(
+        {"col1_a": [1, 0], "col1_b": [0, 1], "col2_a": [0, 0], "col2_c": [1, 1]}
+    )
     result = pd.from_dummies(df)
-    expected = pd.DataFrame({'col1': ['a', 'b'], 'col2': ['c', 'c']}, dtype='category')
-    tm.assert_frame_equal(result, expected)
\ No newline at end of file
+    expected = pd.DataFrame({"col1": ["a", "b"], "col2": ["c", "c"]}, dtype="category")
+    tm.assert_frame_equal(result, expected)

From 9b111b764b8006efca8aa7b244513184719fd392 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 16:30:40 +0000
Subject: [PATCH 07/16] update test_api

---
 pandas/tests/api/test_api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 406d5f055797d..0fbf9653bae18 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -120,6 +120,7 @@ class TestPDApi(Base):
         "eval",
         "factorize",
         "get_dummies",
+        "from_dummies",
         "infer_freq",
         "isna",
         "isnull",

From cfa74cec3090877bf66d9a1c55938f032dc163aa Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 17:10:40 +0000
Subject: [PATCH 08/16] add see also and versionadded

---
 pandas/core/reshape/reshape.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 5066ec261aad5..9ca579c27923a 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -755,6 +755,8 @@ def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFr
     """
     The inverse transformation of ``pandas.get_dummies``.
 
+    .. versionadded:: 1.1.0
+
     Parameters
     ----------
     data : DataFrame
@@ -773,6 +775,10 @@ def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFr
     DataFrame
         Decoded data.
 
+    See Also
+    --------
+    get_dummies : the inverse operation
+
     Examples
     --------
     Say we have a dataframe where some variables have been dummified:
@@ -948,6 +954,7 @@ def get_dummies(
     See Also
     --------
     Series.str.get_dummies : Convert Series to dummy codes.
+    from_dummies : the inverse operation.
 
     Examples
     --------

From 3827e01fb70e723a993909ec0b45ad0969bedfa4 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 17:14:06 +0000
Subject: [PATCH 09/16] remove mytest

---
 pandas/tests/mytest.py | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 pandas/tests/mytest.py

diff --git a/pandas/tests/mytest.py b/pandas/tests/mytest.py
deleted file mode 100644
index d301fdc1ef135..0000000000000
--- a/pandas/tests/mytest.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import pandas as pd
-
-
-def test_me():
-    pd.eval(
-        """
-        A = df.A - df.B
-        B = df.A + df.B
-        """,
-        target=pd.DataFrame(),
-    )

From 5cf2ab8ff1b2b40ef6f2cbcc05a5cf5d2567d332 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.gorelli@ws-1808.seri.co.uk>
Date: Fri, 14 Feb 2020 17:33:04 +0000
Subject: [PATCH 10/16] fix docstring validation

---
 pandas/core/reshape/reshape.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 9ca579c27923a..06795a6654ff1 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -777,7 +777,7 @@ def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFr
 
     See Also
     --------
-    get_dummies : the inverse operation
+    get_dummies : The inverse operation.
 
     Examples
     --------
@@ -954,7 +954,7 @@ def get_dummies(
     See Also
     --------
     Series.str.get_dummies : Convert Series to dummy codes.
-    from_dummies : the inverse operation.
+    from_dummies : The inverse operation.
 
     Examples
     --------

From 8ffb0baf0c9f4a387253fb76d4ee0a9dde03845d Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Fri, 10 Apr 2020 12:50:48 +0100
Subject: [PATCH 11/16] type signature, simplify validation

---
 pandas/__init__.py                        | 204 +++++++++++-----------
 pandas/core/reshape/reshape.py            |  10 +-
 pandas/tests/reshape/test_from_dummies.py |   6 +-
 3 files changed, 108 insertions(+), 112 deletions(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index 85d9a452c4cc9..fcfdefda428e9 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -18,171 +18,163 @@
     )
 del hard_dependencies, dependency, missing_dependencies
 
+from pandas._config import (
+    describe_option,
+    get_option,
+    option_context,
+    options,
+    reset_option,
+    set_option,
+)
+
 # numpy compat
 from pandas.compat.numpy import (
+    _is_numpy_dev,
     _np_version_under1p14,
     _np_version_under1p15,
     _np_version_under1p16,
     _np_version_under1p17,
     _np_version_under1p18,
-    _is_numpy_dev,
-)
-
-try:
-    from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
-except ImportError as e:  # pragma: no cover
-    # hack but overkill to use re
-    module = str(e).replace("cannot import name ", "")
-    raise ImportError(
-        f"C extension: {module} not built. If you want to import "
-        "pandas from the source directory, you may need to run "
-        "'python setup.py build_ext --inplace --force' to build the C extensions first."
-    ) from e
-
-from pandas._config import (
-    get_option,
-    set_option,
-    reset_option,
-    describe_option,
-    option_context,
-    options,
 )
+from pandas.util._print_versions import show_versions
+from pandas.util._tester import test
 
 # let init-time option registration happen
-import pandas.core.config_init
-
-from pandas.core.api import (
-    # dtype
+import pandas.api
+import pandas.arrays
+from pandas.core.api import (  # dtype; missing; indexes; tseries; conversion; misc
+    NA,
+    BooleanDtype,
+    Categorical,
+    CategoricalDtype,
+    CategoricalIndex,
+    DataFrame,
+    DateOffset,
+    DatetimeIndex,
+    DatetimeTZDtype,
+    Float64Index,
+    Grouper,
+    Index,
+    IndexSlice,
     Int8Dtype,
     Int16Dtype,
     Int32Dtype,
     Int64Dtype,
+    Int64Index,
+    Interval,
+    IntervalDtype,
+    IntervalIndex,
+    MultiIndex,
+    NamedAgg,
+    NaT,
+    Period,
+    PeriodDtype,
+    PeriodIndex,
+    RangeIndex,
+    Series,
+    StringDtype,
+    Timedelta,
+    TimedeltaIndex,
+    Timestamp,
     UInt8Dtype,
     UInt16Dtype,
     UInt32Dtype,
     UInt64Dtype,
-    CategoricalDtype,
-    PeriodDtype,
-    IntervalDtype,
-    DatetimeTZDtype,
-    StringDtype,
-    BooleanDtype,
-    # missing
-    NA,
+    UInt64Index,
+    array,
+    bdate_range,
+    date_range,
+    factorize,
+    interval_range,
     isna,
     isnull,
     notna,
     notnull,
-    # indexes
-    Index,
-    CategoricalIndex,
-    Int64Index,
-    UInt64Index,
-    RangeIndex,
-    Float64Index,
-    MultiIndex,
-    IntervalIndex,
-    TimedeltaIndex,
-    DatetimeIndex,
-    PeriodIndex,
-    IndexSlice,
-    # tseries
-    NaT,
-    Period,
     period_range,
-    Timedelta,
+    set_eng_float_format,
     timedelta_range,
-    Timestamp,
-    date_range,
-    bdate_range,
-    Interval,
-    interval_range,
-    DateOffset,
-    # conversion
-    to_numeric,
     to_datetime,
+    to_numeric,
     to_timedelta,
-    # misc
-    Grouper,
-    factorize,
     unique,
     value_counts,
-    NamedAgg,
-    array,
-    Categorical,
-    set_eng_float_format,
-    Series,
-    DataFrame,
 )
-
 from pandas.core.arrays.sparse import SparseDtype
-
-from pandas.tseries.api import infer_freq
-from pandas.tseries import offsets
-
 from pandas.core.computation.api import eval
-
+import pandas.core.config_init
 from pandas.core.reshape.api import (
     concat,
+    crosstab,
+    cut,
+    from_dummies,
+    get_dummies,
     lreshape,
     melt,
-    wide_to_long,
     merge,
     merge_asof,
     merge_ordered,
-    crosstab,
     pivot,
     pivot_table,
-    get_dummies,
-    cut,
     qcut,
-    from_dummies,
+    wide_to_long,
 )
+import pandas.testing
 
-import pandas.api
-from pandas.util._print_versions import show_versions
-
-from pandas.io.api import (
-    # excel
+from pandas.io.api import (  # excel; parsers; pickle; pytables; sql; misc
     ExcelFile,
     ExcelWriter,
-    read_excel,
-    # parsers
-    read_csv,
-    read_fwf,
-    read_table,
-    # pickle
-    read_pickle,
-    to_pickle,
-    # pytables
     HDFStore,
-    read_hdf,
-    # sql
-    read_sql,
-    read_sql_query,
-    read_sql_table,
-    # misc
     read_clipboard,
-    read_parquet,
-    read_orc,
+    read_csv,
+    read_excel,
     read_feather,
+    read_fwf,
     read_gbq,
+    read_hdf,
     read_html,
     read_json,
-    read_stata,
+    read_orc,
+    read_parquet,
+    read_pickle,
     read_sas,
     read_spss,
+    read_sql,
+    read_sql_query,
+    read_sql_table,
+    read_stata,
+    read_table,
+    to_pickle,
 )
-
 from pandas.io.json import _json_normalize as json_normalize
-
-from pandas.util._tester import test
-import pandas.testing
-import pandas.arrays
+from pandas.tseries import offsets
+from pandas.tseries.api import infer_freq
 
 # use the closest tagged version if possible
 from ._version import get_versions
 
+try:
+    from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
+except ImportError as e:  # pragma: no cover
+    # hack but overkill to use re
+    module = str(e).replace("cannot import name ", "")
+    raise ImportError(
+        f"C extension: {module} not built. If you want to import "
+        "pandas from the source directory, you may need to run "
+        "'python setup.py build_ext --inplace --force' to build the C extensions first."
+    ) from e
+
+
+
+
+
+
+
+
+
+
+
+
+
 v = get_versions()
 __version__ = v.get("closest-tag", v["version"])
 __git_version__ = v.get("full-revisionid")
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 4880e886fc4a5..9cbcf7da8718a 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -6,6 +6,7 @@
 import pandas._libs.algos as libalgos
 import pandas._libs.reshape as libreshape
 from pandas._libs.sparse import IntIndex
+from pandas._typing import Dtype
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.cast import maybe_promote
@@ -746,7 +747,12 @@ def _convert_level_number(level_num, columns):
     return result
 
 
-def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFrame":
+def from_dummies(
+    data: "DataFrame",
+    prefix: Optional[Union[str, List[str]]] = None,
+    prefix_sep: str = "_",
+    dtype: Dtype = "category",
+) -> "DataFrame":
     """
     The inverse transformation of ``pandas.get_dummies``.
 
@@ -858,7 +864,7 @@ def from_dummies(data, prefix=None, prefix_sep="_", dtype="category") -> "DataFr
 
     # Check each row sums to 1 or 0
     def _validate_values(data):
-        if not all(i in [0, 1] for i in data.sum(axis=1).unique().tolist()):
+        if (data.sum(axis=1) != 1).any():
             raise ValueError(
                 "Data cannot be decoded! Each row must contain only 0s and"
                 " 1s, and each row may have at most one 1."
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index 477d06708a49b..89334e2348e0b 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -9,15 +9,13 @@
     [
         ("str", {"col1": ["a", "a", "b"]}),
         (str, {"col1": ["a", "a", "b"]},),
-        (None, {"col1": ["a", "a", "b"]}),
+        ("category", {"col1": ["a", "a", "b"]}),
     ],
 )
 def test_dtype(dtype, expected_dict):
     df = pd.DataFrame({"col1_a": [1, 1, 0], "col1_b": [0, 0, 1]})
     result = pd.from_dummies(df, dtype=dtype)
-    expected = pd.DataFrame(expected_dict)
-    if dtype is None:
-        expected = expected.astype("category")
+    expected = pd.DataFrame(expected_dict, dtype=dtype)
     tm.assert_frame_equal(result, expected)
 
 

From 2e2cb572346193b805b2c5f523da2344b7198fea Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 11 Apr 2020 13:53:19 +0100
Subject: [PATCH 12/16] don't blacken pandas/__init__

---
 pandas/__init__.py | 204 +++++++++++++++++++++++----------------------
 1 file changed, 106 insertions(+), 98 deletions(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index fcfdefda428e9..85d9a452c4cc9 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -18,163 +18,171 @@
     )
 del hard_dependencies, dependency, missing_dependencies
 
-from pandas._config import (
-    describe_option,
-    get_option,
-    option_context,
-    options,
-    reset_option,
-    set_option,
-)
-
 # numpy compat
 from pandas.compat.numpy import (
-    _is_numpy_dev,
     _np_version_under1p14,
     _np_version_under1p15,
     _np_version_under1p16,
     _np_version_under1p17,
     _np_version_under1p18,
+    _is_numpy_dev,
+)
+
+try:
+    from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
+except ImportError as e:  # pragma: no cover
+    # hack but overkill to use re
+    module = str(e).replace("cannot import name ", "")
+    raise ImportError(
+        f"C extension: {module} not built. If you want to import "
+        "pandas from the source directory, you may need to run "
+        "'python setup.py build_ext --inplace --force' to build the C extensions first."
+    ) from e
+
+from pandas._config import (
+    get_option,
+    set_option,
+    reset_option,
+    describe_option,
+    option_context,
+    options,
 )
-from pandas.util._print_versions import show_versions
-from pandas.util._tester import test
 
 # let init-time option registration happen
-import pandas.api
-import pandas.arrays
-from pandas.core.api import (  # dtype; missing; indexes; tseries; conversion; misc
-    NA,
-    BooleanDtype,
-    Categorical,
-    CategoricalDtype,
-    CategoricalIndex,
-    DataFrame,
-    DateOffset,
-    DatetimeIndex,
-    DatetimeTZDtype,
-    Float64Index,
-    Grouper,
-    Index,
-    IndexSlice,
+import pandas.core.config_init
+
+from pandas.core.api import (
+    # dtype
     Int8Dtype,
     Int16Dtype,
     Int32Dtype,
     Int64Dtype,
-    Int64Index,
-    Interval,
-    IntervalDtype,
-    IntervalIndex,
-    MultiIndex,
-    NamedAgg,
-    NaT,
-    Period,
-    PeriodDtype,
-    PeriodIndex,
-    RangeIndex,
-    Series,
-    StringDtype,
-    Timedelta,
-    TimedeltaIndex,
-    Timestamp,
     UInt8Dtype,
     UInt16Dtype,
     UInt32Dtype,
     UInt64Dtype,
-    UInt64Index,
-    array,
-    bdate_range,
-    date_range,
-    factorize,
-    interval_range,
+    CategoricalDtype,
+    PeriodDtype,
+    IntervalDtype,
+    DatetimeTZDtype,
+    StringDtype,
+    BooleanDtype,
+    # missing
+    NA,
     isna,
     isnull,
     notna,
     notnull,
+    # indexes
+    Index,
+    CategoricalIndex,
+    Int64Index,
+    UInt64Index,
+    RangeIndex,
+    Float64Index,
+    MultiIndex,
+    IntervalIndex,
+    TimedeltaIndex,
+    DatetimeIndex,
+    PeriodIndex,
+    IndexSlice,
+    # tseries
+    NaT,
+    Period,
     period_range,
-    set_eng_float_format,
+    Timedelta,
     timedelta_range,
-    to_datetime,
+    Timestamp,
+    date_range,
+    bdate_range,
+    Interval,
+    interval_range,
+    DateOffset,
+    # conversion
     to_numeric,
+    to_datetime,
     to_timedelta,
+    # misc
+    Grouper,
+    factorize,
     unique,
     value_counts,
+    NamedAgg,
+    array,
+    Categorical,
+    set_eng_float_format,
+    Series,
+    DataFrame,
 )
+
 from pandas.core.arrays.sparse import SparseDtype
+
+from pandas.tseries.api import infer_freq
+from pandas.tseries import offsets
+
 from pandas.core.computation.api import eval
-import pandas.core.config_init
+
 from pandas.core.reshape.api import (
     concat,
-    crosstab,
-    cut,
-    from_dummies,
-    get_dummies,
     lreshape,
     melt,
+    wide_to_long,
     merge,
     merge_asof,
     merge_ordered,
+    crosstab,
     pivot,
     pivot_table,
+    get_dummies,
+    cut,
     qcut,
-    wide_to_long,
+    from_dummies,
 )
-import pandas.testing
 
-from pandas.io.api import (  # excel; parsers; pickle; pytables; sql; misc
+import pandas.api
+from pandas.util._print_versions import show_versions
+
+from pandas.io.api import (
+    # excel
     ExcelFile,
     ExcelWriter,
+    read_excel,
+    # parsers
+    read_csv,
+    read_fwf,
+    read_table,
+    # pickle
+    read_pickle,
+    to_pickle,
+    # pytables
     HDFStore,
+    read_hdf,
+    # sql
+    read_sql,
+    read_sql_query,
+    read_sql_table,
+    # misc
     read_clipboard,
-    read_csv,
-    read_excel,
+    read_parquet,
+    read_orc,
     read_feather,
-    read_fwf,
     read_gbq,
-    read_hdf,
     read_html,
     read_json,
-    read_orc,
-    read_parquet,
-    read_pickle,
+    read_stata,
     read_sas,
     read_spss,
-    read_sql,
-    read_sql_query,
-    read_sql_table,
-    read_stata,
-    read_table,
-    to_pickle,
 )
+
 from pandas.io.json import _json_normalize as json_normalize
-from pandas.tseries import offsets
-from pandas.tseries.api import infer_freq
+
+from pandas.util._tester import test
+import pandas.testing
+import pandas.arrays
 
 # use the closest tagged version if possible
 from ._version import get_versions
 
-try:
-    from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
-except ImportError as e:  # pragma: no cover
-    # hack but overkill to use re
-    module = str(e).replace("cannot import name ", "")
-    raise ImportError(
-        f"C extension: {module} not built. If you want to import "
-        "pandas from the source directory, you may need to run "
-        "'python setup.py build_ext --inplace --force' to build the C extensions first."
-    ) from e
-
-
-
-
-
-
-
-
-
-
-
-
-
 v = get_versions()
 __version__ = v.get("closest-tag", v["version"])
 __git_version__ = v.get("full-revisionid")

From d2916d95bcef8f5ee5ba4bbed9fbb18bfbe65adf Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 18 Apr 2020 09:32:54 +0100
Subject: [PATCH 13/16] revert taking check_len out of get_dummies

---
 pandas/core/reshape/reshape.py | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index de5fe04ae33c8..e5e887a06bac4 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -871,21 +871,6 @@ def _validate_values(data):
     return out
 
 
-def _check_len(item, name, data_to_encode):
-    """ Validate prefixes and separator to avoid silently dropping cols. """
-    len_msg = (
-        "Length of '{name}' ({len_item}) did not match the "
-        "length of the columns being encoded ({len_enc})."
-    )
-
-    if is_list_like(item):
-        if not len(item) == data_to_encode.shape[1]:
-            len_msg = len_msg.format(
-                name=name, len_item=len(item), len_enc=data_to_encode.shape[1]
-            )
-            raise ValueError(len_msg)
-
-
 def get_dummies(
     data,
     prefix=None,
@@ -1007,8 +992,20 @@ def get_dummies(
         else:
             data_to_encode = data[columns]
 
-        _check_len(prefix, "prefix", data_to_encode)
-        _check_len(prefix_sep, "prefix_sep", data_to_encode)
+        # validate prefixes and separator to avoid silently dropping cols
+        def check_len(item, name):
+
+            if is_list_like(item):
+                if not len(item) == data_to_encode.shape[1]:
+                    len_msg = (
+                        f"Length of '{name}' ({len(item)}) did not match the "
+                        "length of the columns being encoded "
+                        f"({data_to_encode.shape[1]})."
+                    )
+                    raise ValueError(len_msg)
+
+        check_len(prefix, "prefix")
+        check_len(prefix_sep, "prefix_sep")
 
         if isinstance(prefix, str):
             prefix = itertools.cycle([prefix])

From 6a8456900bc8feca782b07d77e9bda1418f6e2ab Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 18 Apr 2020 09:52:53 +0100
Subject: [PATCH 14/16] reword comment

---
 pandas/core/reshape/reshape.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index e5e887a06bac4..91e4414a580c2 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -820,8 +820,8 @@ def from_dummies(
                 " pass a value to `prefix` with which to name"
                 " the decoded columns."
             )
-        # If no column contains `prefix_sep`, we add `prefix`_`prefix_sep` to
-        # each column.
+        # If no column contains `prefix_sep`, we prepend `prefix` and
+        # `prefix_sep` to each column.
         out = data.rename(columns=lambda x: f"{prefix}{prefix_sep}{x}").copy()
         columns_to_decode = out.columns
     else:

From 5efee2969cc5ebfe5532eda72352a7de0628e47a Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 18 Apr 2020 10:36:33 +0100
Subject: [PATCH 15/16] put space at end of string, rather than beginning

---
 pandas/core/reshape/reshape.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 91e4414a580c2..258ee6c2460d8 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -816,9 +816,9 @@ def from_dummies(
     if not columns_to_decode:
         if prefix is None:
             raise ValueError(
-                "If no columns contain `prefix_sep`, you must"
-                " pass a value to `prefix` with which to name"
-                " the decoded columns."
+                "If no columns contain `prefix_sep`, you must "
+                "pass a value to `prefix` with which to name "
+                "the decoded columns."
             )
         # If no column contains `prefix_sep`, we prepend `prefix` and
         # `prefix_sep` to each column.

From 1bf16c9916c98147905b14d99b7ee37ac04130e7 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Sat, 18 Apr 2020 11:00:06 +0100
Subject: [PATCH 16/16] put space at end of string

---
 pandas/core/reshape/reshape.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 258ee6c2460d8..23f14cc84ad62 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -847,8 +847,8 @@ def from_dummies(
     def _validate_values(data):
         if (data.sum(axis=1) != 1).any():
             raise ValueError(
-                "Data cannot be decoded! Each row must contain only 0s and"
-                " 1s, and each row may have at most one 1."
+                "Data cannot be decoded! Each row must contain only 0s and "
+                "1s, and each row may have at most one 1."
             )
 
     for prefix_ in prefix: