From 18de37686fb70d80d1f66486e25ffb44918862d3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 23 Feb 2018 16:26:01 -0600
Subject: [PATCH 1/8] REF/BUG/API: factorizing categorical data

This changes / fixes how Categorical data are factorized. The return value of a
factorized categorical is now `Tuple[ndarray[int], Categorical]`.

Before

```python
In [2]: l, u = pd.factorize(pd.Categorical(['a', 'a', 'b']))

In [3]: l
Out[3]: array([0, 0, 1])

In [4]: u
Out[4]: array([0, 1])
```

after

```python
In [2]: l, u = pd.factorize(pd.Categorical(['a', 'a', 'b']))

In [3]: l
Out[3]: array([0, 0, 1])

In [4]: u
Out[4]:
[a, b]
Categories (2, object): [a, b]
```

The implementation is similar to `.unique`.

1. The algo (`pd.factorize`, `pd.unique`) handles unboxing / dtype coercion
2. The algo dispatches the actual array factorization for extension types
3. The algo boxes the output if necessary, depending on the input.

I've implemented this as a new public method on ``Categorical``, mainly since
this is what we do for unique, and I think it's a useful method to have.

This fixes a bug in factorizing categoricals with missing values. Previously, we
included -1 in the uniques.

Before

```python
In [2]: l, u = pd.factorize(pd.Categorical(['a', 'a', 'b', None]))

In [3]: u
Out[3]: array([ 0,  1, -1])
```

After

```python
In [2]: l, u = pd.factorize(pd.Categorical(['a', 'a', 'b', None]))

In [3]: u
Out[3]:
[a, b]
Categories (2, object): [a, b]
```
---
 doc/source/whatsnew/v0.23.0.txt        |  2 +
 pandas/core/algorithms.py              | 74 ++++++++++++++++++++------
 pandas/core/arrays/categorical.py      | 59 ++++++++++++++++++++
 pandas/tests/categorical/test_algos.py | 49 +++++++++++++++++
 4 files changed, 167 insertions(+), 17 deletions(-)
 create mode 100644 pandas/tests/categorical/test_algos.py

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 542e62aa374be..01f68940c1715 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -746,6 +746,8 @@ Categorical
 - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
 - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
 - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`)
+- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`)
+- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index d616e3f92aa4d..3a34880afbd19 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -438,6 +438,35 @@ def isin(comps, values):
     return f(comps, values)
 
 
+def _factorize_array(values, check_nulls, na_sentinel=-1, size_hint=None):
+    """Factorize an array-like to labels and uniques.
+
+    This doesn't do any coercion of types or unboxing before factorization.
+
+    Parameters
+    ----------
+    values : ndarray
+    check_nulls : bool
+        Whether to check for nulls in the hashtable's 'get_labels' method.
+    na_sentinel : int, default -1
+    size_hint : int, optional
+        Passsed through to the hashtable's 'get_labels' method
+
+    Returns
+    -------
+    labels, uniques : ndarray
+    """
+    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)
+
+    table = hash_klass(size_hint or len(values))
+    uniques = vec_klass()
+    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)
+
+    labels = _ensure_platform_int(labels)
+    uniques = uniques.to_array()
+    return labels, uniques
+
+
 @deprecate_kwarg(old_arg_name='order', new_arg_name=None)
 def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     """
@@ -445,8 +474,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
     Parameters
     ----------
-    values : ndarray (1-d)
-        Sequence
+    values : Sequence
+        ndarrays must be 1-D. Sequences that aren't pandas objects are
+        coereced to ndarrays before factorization.
     sort : boolean, default False
         Sort by values
     na_sentinel : int, default -1
@@ -461,26 +491,36 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
         Series
 
     note: an array of Periods will ignore sort as it returns an always sorted
-    PeriodIndex
+    PeriodIndex.
     """
+    # Implementation notes: This method is responsible for 3 things
+    # 1.) coercing data to array-like (ndarray, Index, extension array)
+    # 2.) factorizing labels and uniques
+    # 3.) Maybe boxing the output in an Index
+    #
+    # Step 2 is dispatched to extension types (like Categorical). They are
+    # responsible only for factorization and sorting if necessary. All
+    # data coercion and boxing should happen here.
 
     values = _ensure_arraylike(values)
     original = values
-    values, dtype, _ = _ensure_data(values)
-    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)
-
-    table = hash_klass(size_hint or len(values))
-    uniques = vec_klass()
-    check_nulls = not is_integer_dtype(original)
-    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)
-
-    labels = _ensure_platform_int(labels)
-    uniques = uniques.to_array()
 
-    if sort and len(uniques) > 0:
-        from pandas.core.sorting import safe_sort
-        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
-                                    assume_unique=True)
+    if is_categorical_dtype(values):
+        values = getattr(values, '_values', values)
+        labels, uniques = values.factorize(sort=sort)
+        dtype = original.dtype
+    else:
+        values, dtype, _ = _ensure_data(values)
+        check_nulls = not is_integer_dtype(original)
+        labels, uniques = _factorize_array(values, check_nulls,
+                                           na_sentinel=na_sentinel,
+                                           size_hint=size_hint)
+
+        if sort and len(uniques) > 0:
+            from pandas.core.sorting import safe_sort
+            uniques, labels = safe_sort(uniques, labels,
+                                        na_sentinel=na_sentinel,
+                                        assume_unique=True)
 
     uniques = _reconstruct_data(uniques, dtype, original)
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c6eeabf0148d0..5f9661c17b0d3 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -7,6 +7,7 @@
 from pandas import compat
 from pandas.compat import u, lzip
 from pandas._libs import lib, algos as libalgos
+from pandas._libs.tslib import iNaT
 
 from pandas.core.dtypes.generic import (
     ABCSeries, ABCIndexClass, ABCCategoricalIndex)
@@ -2068,6 +2069,64 @@ def unique(self):
             take_codes = sorted(take_codes)
         return cat.set_categories(cat.categories.take(take_codes))
 
+    def factorize(self, sort=False, na_sentinel=-1):
+        """Encode the Categorical as an enumerated type.
+
+        Parameters
+        ----------
+        sort : boolean, default False
+            Sort by values
+        na_sentinel: int, default -1
+            Value to mark "not found"
+
+        Returns
+        -------
+        labels : ndarray
+            An integer NumPy array that's an indexer into the original
+            Categorical
+        uniques : Categorical
+            A Categorical whose values are the unique values and
+            whose dtype matches the original CategoricalDtype. Note that if
+            there any unobserved categories in ``self`` will not be present
+            in ``uniques.values``. They will be present in
+            ``uniques.categories``
+
+        Examples
+        --------
+        >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
+        >>> labels, uniques = cat.factorize()
+        >>> labels
+        (array([0, 0, 1]),
+        >>> uniques
+        [a, c]
+        Categories (3, object): [a, b, c])
+
+        Missing values are handled
+
+        >>> labels, uniques = pd.factorize(pd.Categorical(['a', 'b', None]))
+        >>> labels
+        array([ 0,  1, -1])
+        >>> uniques
+        [a, b]
+        Categories (2, object): [a, b]
+        """
+        from pandas.core.algorithms import _factorize_array, take_1d
+
+        codes = self.codes.astype('int64')
+        # We set missing codes, normally -1, to iNaT so that the
+        # Int64HashTable treats them as missing values.
+        codes[codes == -1] = iNaT
+        labels, uniques = _factorize_array(codes, check_nulls=True,
+                                           na_sentinel=na_sentinel)
+        uniques = self._constructor(self.categories.take(uniques),
+                                    categories=self.categories,
+                                    ordered=self.ordered)
+        if sort:
+            order = uniques.argsort()
+            labels = take_1d(order, labels, fill_value=na_sentinel)
+            uniques = uniques.take(order)
+        return labels, uniques
+
     def equals(self, other):
         """
         Returns True if categorical arrays are equal.
diff --git a/pandas/tests/categorical/test_algos.py b/pandas/tests/categorical/test_algos.py
new file mode 100644
index 0000000000000..6d21f548c65b2
--- /dev/null
+++ b/pandas/tests/categorical/test_algos.py
@@ -0,0 +1,49 @@
+import pytest
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize('ordered', [True, False])
+@pytest.mark.parametrize('categories', [
+    ['b', 'a', 'c'],
+    ['a', 'b', 'c', 'd'],
+])
+def test_factorize(categories, ordered):
+    cat = pd.Categorical(['b', 'b', 'a', 'c', None],
+                         categories=categories,
+                         ordered=ordered)
+    labels, uniques = pd.factorize(cat)
+    expected_labels = np.array([0, 0, 1, 2, -1])
+    expected_uniques = pd.Categorical(['b', 'a', 'c'],
+                                      categories=categories,
+                                      ordered=ordered)
+
+    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_categorical_equal(uniques, expected_uniques)
+
+
+def test_factorized_sort():
+    cat = pd.Categorical(['b', 'b', None, 'a'])
+    labels, uniques = pd.factorize(cat, sort=True)
+    expected_labels = np.array([1, 1, -1, 0])
+    expected_uniques = pd.Categorical(['a', 'b'])
+
+    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_categorical_equal(uniques, expected_uniques)
+
+
+def test_factorized_sort_ordered():
+    cat = pd.Categorical(['b', 'b', None, 'a'],
+                         categories=['c', 'b', 'a'],
+                         ordered=True)
+
+    labels, uniques = pd.factorize(cat, sort=True)
+    expected_labels = np.array([0, 0, -1, 1])
+    expected_uniques = pd.Categorical(['b', 'a'],
+                                      categories=['c', 'b', 'a'],
+                                      ordered=True)
+
+    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_categorical_equal(uniques, expected_uniques)

From 9ef5be218ca60aea53e24b2c2ae322413e9b401e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 28 Feb 2018 13:57:36 -0600
Subject: [PATCH 2/8] Explicit dtype for expected

---
 pandas/tests/categorical/test_algos.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/categorical/test_algos.py b/pandas/tests/categorical/test_algos.py
index 6d21f548c65b2..61764ec0ff632 100644
--- a/pandas/tests/categorical/test_algos.py
+++ b/pandas/tests/categorical/test_algos.py
@@ -15,7 +15,7 @@ def test_factorize(categories, ordered):
                          categories=categories,
                          ordered=ordered)
     labels, uniques = pd.factorize(cat)
-    expected_labels = np.array([0, 0, 1, 2, -1])
+    expected_labels = np.array([0, 0, 1, 2, -1], dtype='int64')
     expected_uniques = pd.Categorical(['b', 'a', 'c'],
                                       categories=categories,
                                       ordered=ordered)
@@ -27,7 +27,7 @@ def test_factorize(categories, ordered):
 def test_factorized_sort():
     cat = pd.Categorical(['b', 'b', None, 'a'])
     labels, uniques = pd.factorize(cat, sort=True)
-    expected_labels = np.array([1, 1, -1, 0])
+    expected_labels = np.array([1, 1, -1, 0], dtype='int64')
     expected_uniques = pd.Categorical(['a', 'b'])
 
     tm.assert_numpy_array_equal(labels, expected_labels)
@@ -40,7 +40,7 @@ def test_factorized_sort_ordered():
                          ordered=True)
 
     labels, uniques = pd.factorize(cat, sort=True)
-    expected_labels = np.array([0, 0, -1, 1])
+    expected_labels = np.array([0, 0, -1, 1], dtype='int64')
     expected_uniques = pd.Categorical(['b', 'a'],
                                       categories=['c', 'b', 'a'],
                                       ordered=True)

From 5e52b6f60101bef3eb92bd1197d103fc71b4ba21 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Mar 2018 16:04:03 -0800
Subject: [PATCH 3/8] Clean : imports / remove sort

---
 pandas/core/arrays/categorical.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 5f9661c17b0d3..b0d5344f06a3f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -30,7 +30,8 @@
     is_scalar,
     is_dict_like)
 
-from pandas.core.algorithms import factorize, take_1d, unique1d
+from pandas.core.algorithms import (
+    factorize, take_1d, unique1d, _factorize_array)
 from pandas.core.accessor import PandasDelegate
 from pandas.core.base import (PandasObject,
                               NoNewAttributesMixin, _shared_docs)
@@ -2069,13 +2070,11 @@ def unique(self):
             take_codes = sorted(take_codes)
         return cat.set_categories(cat.categories.take(take_codes))
 
-    def factorize(self, sort=False, na_sentinel=-1):
+    def factorize(self, na_sentinel=-1):
         """Encode the Categorical as an enumerated type.
 
         Parameters
         ----------
-        sort : boolean, default False
-            Sort by values
         na_sentinel: int, default -1
             Value to mark "not found"
 
@@ -2110,21 +2109,16 @@ def factorize(self, sort=False, na_sentinel=-1):
         [a, b]
         Categories (2, object): [a, b]
         """
-        from pandas.core.algorithms import _factorize_array, take_1d
 
         codes = self.codes.astype('int64')
+        codes[codes == -1] = iNaT
         # We set missing codes, normally -1, to iNaT so that the
         # Int64HashTable treats them as missing values.
-        codes[codes == -1] = iNaT
         labels, uniques = _factorize_array(codes, check_nulls=True,
                                            na_sentinel=na_sentinel)
         uniques = self._constructor(self.categories.take(uniques),
                                     categories=self.categories,
                                     ordered=self.ordered)
-        if sort:
-            order = uniques.argsort()
-            labels = take_1d(order, labels, fill_value=na_sentinel)
-            uniques = uniques.take(order)
         return labels, uniques
 
     def equals(self, other):

From 121b682ab4c7b912df2a814305560b23467154f4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Mar 2018 16:12:07 -0800
Subject: [PATCH 4/8] Restore sort

---
 pandas/core/arrays/categorical.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d1ae84184fb89..800a12d2f5ca1 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -30,8 +30,7 @@
     is_scalar,
     is_dict_like)
 
-from pandas.core.algorithms import (
-    factorize, take_1d, unique1d, _factorize_array)
+from pandas.core.algorithms import factorize, take_1d, unique1d
 from pandas.core.accessor import PandasDelegate
 from pandas.core.base import (PandasObject,
                               NoNewAttributesMixin, _shared_docs)
@@ -366,10 +365,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
         self._dtype = self._dtype.update_dtype(dtype)
         self._codes = coerce_indexer_dtype(codes, dtype.categories)
 
-    @classmethod
-    def _constructor_from_sequence(cls, scalars):
-        return cls(scalars)
-
     @property
     def categories(self):
         """The categories of this categorical.
@@ -2074,11 +2069,13 @@ def unique(self):
             take_codes = sorted(take_codes)
         return cat.set_categories(cat.categories.take(take_codes))
 
-    def factorize(self, na_sentinel=-1):
+    def factorize(self, sort=False, na_sentinel=-1):
         """Encode the Categorical as an enumerated type.
 
         Parameters
         ----------
+        sort : boolean, default False
+            Sort by values
         na_sentinel: int, default -1
             Value to mark "not found"
 
@@ -2113,6 +2110,7 @@ def factorize(self, na_sentinel=-1):
         [a, b]
         Categories (2, object): [a, b]
         """
+        from pandas.core.algorithms import _factorize_array, take_1d
 
         codes = self.codes.astype('int64')
         codes[codes == -1] = iNaT
@@ -2123,6 +2121,10 @@ def factorize(self, na_sentinel=-1):
         uniques = self._constructor(self.categories.take(uniques),
                                     categories=self.categories,
                                     ordered=self.ordered)
+        if sort:
+            order = uniques.argsort()
+            labels = take_1d(order, labels, fill_value=na_sentinel)
+            uniques = uniques.take(order)
         return labels, uniques
 
     def equals(self, other):

From a6bc40594ac706198fcc44057bd99e7869a76f38 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Mar 2018 14:53:48 -0600
Subject: [PATCH 5/8] REF: remove sort from Categorical.factorize

---
 pandas/core/algorithms.py         | 12 +++++++++---
 pandas/core/arrays/categorical.py |  8 ++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 3a34880afbd19..9b41b0feeef4a 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -507,7 +507,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
     if is_categorical_dtype(values):
         values = getattr(values, '_values', values)
-        labels, uniques = values.factorize(sort=sort)
+        labels, uniques = values.factorize()
         dtype = original.dtype
     else:
         values, dtype, _ = _ensure_data(values)
@@ -516,8 +516,14 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
                                            na_sentinel=na_sentinel,
                                            size_hint=size_hint)
 
-        if sort and len(uniques) > 0:
-            from pandas.core.sorting import safe_sort
+    if sort and len(uniques) > 0:
+        from pandas.core.sorting import safe_sort
+        try:
+            order = uniques.argsort()
+            labels = take_1d(order, labels, fill_value=na_sentinel)
+            uniques = uniques.take(order)
+        except TypeError:
+            # Mixed types, where uniques.argsort fails.
             uniques, labels = safe_sort(uniques, labels,
                                         na_sentinel=na_sentinel,
                                         assume_unique=True)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 800a12d2f5ca1..c4da558467e41 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2069,7 +2069,7 @@ def unique(self):
             take_codes = sorted(take_codes)
         return cat.set_categories(cat.categories.take(take_codes))
 
-    def factorize(self, sort=False, na_sentinel=-1):
+    def factorize(self, na_sentinel=-1):
         """Encode the Categorical as an enumerated type.
 
         Parameters
@@ -2110,7 +2110,7 @@ def factorize(self, sort=False, na_sentinel=-1):
         [a, b]
         Categories (2, object): [a, b]
         """
-        from pandas.core.algorithms import _factorize_array, take_1d
+        from pandas.core.algorithms import _factorize_array
 
         codes = self.codes.astype('int64')
         codes[codes == -1] = iNaT
@@ -2121,10 +2121,6 @@ def factorize(self, sort=False, na_sentinel=-1):
         uniques = self._constructor(self.categories.take(uniques),
                                     categories=self.categories,
                                     ordered=self.ordered)
-        if sort:
-            order = uniques.argsort()
-            labels = take_1d(order, labels, fill_value=na_sentinel)
-            uniques = uniques.take(order)
         return labels, uniques
 
     def equals(self, other):

From 0bfbc478a4fe2ccb36f3819bfccec8389d8c05cf Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Mar 2018 14:57:15 -0600
Subject: [PATCH 6/8] Updated comment

---
 pandas/core/algorithms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 9b41b0feeef4a..7b6ecd2df93d8 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -499,8 +499,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     # 3.) Maybe boxing the output in an Index
     #
     # Step 2 is dispatched to extension types (like Categorical). They are
-    # responsible only for factorization and sorting if necessary. All
-    # data coercion and boxing should happen here.
+    # responsible only for factorization. All data coercion, sorting and boxing
+    # should happen here.
 
     values = _ensure_arraylike(values)
     original = values

From 2688c4f39d4ffdb40ddc1ecb6471fb11627cac7e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Mar 2018 07:56:08 -0500
Subject: [PATCH 7/8] Fixed new sort algo

---
 pandas/core/algorithms.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 7b6ecd2df93d8..884c564763a10 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -520,7 +520,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
         from pandas.core.sorting import safe_sort
         try:
             order = uniques.argsort()
-            labels = take_1d(order, labels, fill_value=na_sentinel)
+            order2 = order.argsort()
+            labels = take_1d(order2, labels, fill_value=na_sentinel)
             uniques = uniques.take(order)
         except TypeError:
             # Mixed types, where uniques.argsort fails.

From ab4f01c0eec37baf9c2bb47014ee192484bd7200 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Mar 2018 20:37:52 -0500
Subject: [PATCH 8/8] Implement interface

---
 pandas/core/arrays/categorical.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c4da558467e41..b37f88d8bfdce 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -422,6 +422,10 @@ def _ndarray_values(self):
     def _constructor(self):
         return Categorical
 
+    @classmethod
+    def _constructor_from_sequence(cls, scalars):
+        return Categorical(scalars)
+
     def copy(self):
         """ Copy constructor. """
         return self._constructor(values=self._codes.copy(),