From 85ea4faf301016c16b7ce336588d0950d803ba86 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 19 Apr 2020 13:56:23 -0700
Subject: [PATCH 1/3] REF: Implement NDArrayBackedExtensionArray

---
 pandas/core/arrays/_mixins.py                 | 62 ++++++++++++
 pandas/core/arrays/categorical.py             | 96 ++++---------------
 pandas/core/arrays/datetimelike.py            | 31 +++---
 pandas/tests/arrays/categorical/test_algos.py |  2 +-
 pandas/tests/frame/test_reshape.py            |  4 +-
 5 files changed, 100 insertions(+), 95 deletions(-)
 create mode 100644 pandas/core/arrays/_mixins.py

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
new file mode 100644
index 0000000000000..0ed9de804c55e
--- /dev/null
+++ b/pandas/core/arrays/_mixins.py
@@ -0,0 +1,62 @@
+from typing import Any, Sequence, TypeVar
+
+import numpy as np
+
+from pandas.errors import AbstractMethodError
+
+from pandas.core.algorithms import take
+from pandas.core.arrays.base import ExtensionArray
+
+_T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
+
+
+class NDArrayBackedExtensionArray(ExtensionArray):
+    """
+    ExtensionArray that is backed by a single NumPy ndarray.
+    """
+
+    _ndarray: np.ndarray
+
+    def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
+        """
+        Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
+
+        This should round-trip:
+            self == self._from_backing_data(self._ndarray)
+        """
+        raise AbstractMethodError(self)
+
+    # ------------------------------------------------------------------------
+
+    def take(
+        self: _T,
+        indices: Sequence[int],
+        allow_fill: bool = False,
+        fill_value: Any = None,
+    ) -> _T:
+        if allow_fill:
+            fill_value = self._validate_fill_value(fill_value)
+
+        new_data = take(
+            self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value,
+        )
+        return self._from_backing_data(new_data)
+
+    def _validate_fill_value(self, fill_value):
+        """
+        If a fill_value is passed to `take` convert it to a representation
+        suitable for self._ndarray, raising ValueError if this is not possible.
+
+        Parameters
+        ----------
+        fill_value : object
+
+        Returns
+        -------
+        fill_value : native representation
+
+        Raises
+        ------
+        ValueError
+        """
+        raise AbstractMethodError(self)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index af07dee3b6838..2b8b71d096737 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -50,8 +50,9 @@
 from pandas.core import ops
 from pandas.core.accessor import PandasDelegate, delegate_names
 import pandas.core.algorithms as algorithms
-from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d
-from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
+from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
+from pandas.core.arrays.base import _extension_array_shared_docs
 from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array, sanitize_array
@@ -210,7 +211,7 @@ def contains(cat, key, container):
 """
 
 
-class Categorical(ExtensionArray, PandasObject):
+class Categorical(NDArrayBackedExtensionArray, PandasObject):
     """
     Represent a categorical variable in classic R / S-plus fashion.
 
@@ -1762,84 +1763,20 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._constructor(codes, dtype=self.dtype, fastpath=True)
 
-    def take(self, indexer, allow_fill: bool = False, fill_value=None):
-        """
-        Take elements from the Categorical.
-
-        Parameters
-        ----------
-        indexer : sequence of int
-            The indices in `self` to take. The meaning of negative values in
-            `indexer` depends on the value of `allow_fill`.
-        allow_fill : bool, default False
-            How to handle negative values in `indexer`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right. This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate missing values
-              (the default). These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-            .. versionchanged:: 1.0.0
-
-               Default value changed from ``True`` to ``False``.
-
-        fill_value : object
-            The value to use for `indices` that are missing (-1), when
-            ``allow_fill=True``. This should be the category, i.e. a value
-            in ``self.categories``, not a code.
-
-        Returns
-        -------
-        Categorical
-            This Categorical will have the same categories and ordered as
-            `self`.
-
-        See Also
-        --------
-        Series.take : Similar method for Series.
-        numpy.ndarray.take : Similar method for NumPy arrays.
-
-        Examples
-        --------
-        >>> cat = pd.Categorical(['a', 'a', 'b'])
-        >>> cat
-        [a, a, b]
-        Categories (2, object): [a, b]
-
-        Specify ``allow_fill==False`` to have negative indices mean indexing
-        from the right.
-
-        >>> cat.take([0, -1, -2], allow_fill=False)
-        [a, b, a]
-        Categories (2, object): [a, b]
-
-        With ``allow_fill=True``, indices equal to ``-1`` mean "missing"
-        values that should be filled with the `fill_value`, which is
-        ``np.nan`` by default.
-
-        >>> cat.take([0, -1, -1], allow_fill=True)
-        [a, NaN, NaN]
-        Categories (2, object): [a, b]
+    # ------------------------------------------------------------------
+    # NDArrayBackedExtensionArray compat
 
-        The fill value can be specified.
-
-        >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
-        [a, a, a]
-        Categories (2, object): [a, b]
-
-        Specifying a fill value that's not in ``self.categories``
-        will raise a ``TypeError``.
-        """
-        indexer = np.asarray(indexer, dtype=np.intp)
+    @property
+    def _ndarray(self) -> np.ndarray:
+        return self._codes
 
-        dtype = self.dtype
+    def _from_backing_data(self, arr: np.ndarray):
+        return self._constructor(arr, dtype=self.dtype, fastpath=True)
 
+    def _validate_fill_value(self, fill_value):
         if isna(fill_value):
             fill_value = -1
-        elif allow_fill:
+        else:
             # convert user-provided `fill_value` to codes
             if fill_value in self.categories:
                 fill_value = self.categories.get_loc(fill_value)
@@ -1848,11 +1785,10 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None):
                     f"'fill_value' ('{fill_value}') is not in this "
                     "Categorical's categories."
                 )
-                raise TypeError(msg)
+                raise ValueError(msg)
+        return fill_value
 
-        codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value)
-        result = type(self).from_codes(codes, dtype=dtype)
-        return result
+    # ------------------------------------------------------------------
 
     def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
         # GH#27745 deprecate alias that other EAs dont have
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index e3cdc898a88bf..925417639a734 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -39,8 +39,9 @@
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
 
 from pandas.core import missing, nanops, ops
-from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
+from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
 from pandas.core.array_algos.transforms import shift
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array
@@ -425,7 +426,9 @@ def _with_freq(self, freq):
         return self
 
 
-class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray):
+class DatetimeLikeArrayMixin(
+    ExtensionOpsMixin, AttributesMixin, NDArrayBackedExtensionArray
+):
     """
     Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
 
@@ -437,6 +440,20 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray)
         _generate_range
     """
 
+    # ------------------------------------------------------------------
+    # NDArrayBackedExtensionArray compat
+
+    @property
+    def _ndarray(self) -> np.ndarray:
+        # NB: A bunch of Interval tests fail if we use ._data
+        return self.asi8
+
+    def _from_backing_data(self, arr: np.ndarray):
+        # Note: we do not retain `freq`
+        return type(self)(arr, dtype=self.dtype)  # type: ignore
+
+    # ------------------------------------------------------------------
+
     @property
     def ndim(self) -> int:
         return self._data.ndim
@@ -711,16 +728,6 @@ def _validate_fill_value(self, fill_value):
             )
         return fill_value
 
-    def take(self, indices, allow_fill=False, fill_value=None):
-        if allow_fill:
-            fill_value = self._validate_fill_value(fill_value)
-
-        new_values = take(
-            self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value
-        )
-
-        return type(self)(new_values, dtype=self.dtype)
-
     @classmethod
     def _concat_same_type(cls, to_concat, axis: int = 0):
 
diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
index 325fa476d70e6..02143a833c2ed 100644
--- a/pandas/tests/arrays/categorical/test_algos.py
+++ b/pandas/tests/arrays/categorical/test_algos.py
@@ -185,7 +185,7 @@ def test_take_fill_value_new_raises(self):
         # https://github.com/pandas-dev/pandas/issues/23296
         cat = pd.Categorical(["a", "b", "c"])
         xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
-        with pytest.raises(TypeError, match=xpr):
+        with pytest.raises(ValueError, match=xpr):
             cat.take([0, 1, -1], fill_value="d", allow_fill=True)
 
     def test_take_nd_deprecated(self):
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 9d3c40ce926d7..c504037c740f2 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -320,9 +320,9 @@ def test_unstack_fill_frame_categorical(self):
         )
         tm.assert_frame_equal(result, expected)
 
-        # Fill with non-category results in a TypeError
+        # Fill with non-category results in a ValueError
         msg = r"'fill_value' \('d'\) is not in"
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             data.unstack(fill_value="d")
 
         # Fill with category value replaces missing values as expected

From aad99709e8ce658743639de58fe2bb61f588103c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 20 Apr 2020 08:25:42 -0700
Subject: [PATCH 2/3] restore docstring, whatsnew

---
 doc/source/whatsnew/v1.1.0.rst    |  1 +
 pandas/core/arrays/categorical.py | 75 +++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 9d40f9b6ffa2c..e0ddda40c68f8 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -190,6 +190,7 @@ Backwards incompatible API changes
   Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`)
 - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
 - Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`)
+- Passing an invalid ``fill_value`` to :meth:`Categorical.take` raises a ``ValueError`` instead of ``TypeError`` (:issue:`33660`)
 
 ``MultiIndex.get_indexer`` interprets `method` argument differently
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c044cf490ad58..34f717f2335f4 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1781,6 +1781,81 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._constructor(codes, dtype=self.dtype, fastpath=True)
 
+    def take(self, indexer, allow_fill: bool = False, fill_value=None):
+        """
+        Take elements from the Categorical.
+
+        Parameters
+        ----------
+        indexer : sequence of int
+            The indices in `self` to take. The meaning of negative values in
+            `indexer` depends on the value of `allow_fill`.
+        allow_fill : bool, default False
+            How to handle negative values in `indexer`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right. This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate missing values
+              (the default). These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+            .. versionchanged:: 1.0.0
+
+               Default value changed from ``True`` to ``False``.
+
+        fill_value : object
+            The value to use for `indices` that are missing (-1), when
+            ``allow_fill=True``. This should be the category, i.e. a value
+            in ``self.categories``, not a code.
+
+        Returns
+        -------
+        Categorical
+            This Categorical will have the same categories and ordered as
+            `self`.
+
+        See Also
+        --------
+        Series.take : Similar method for Series.
+        numpy.ndarray.take : Similar method for NumPy arrays.
+
+        Examples
+        --------
+        >>> cat = pd.Categorical(['a', 'a', 'b'])
+        >>> cat
+        [a, a, b]
+        Categories (2, object): [a, b]
+
+        Specify ``allow_fill==False`` to have negative indices mean indexing
+        from the right.
+
+        >>> cat.take([0, -1, -2], allow_fill=False)
+        [a, b, a]
+        Categories (2, object): [a, b]
+
+        With ``allow_fill=True``, indices equal to ``-1`` mean "missing"
+        values that should be filled with the `fill_value`, which is
+        ``np.nan`` by default.
+
+        >>> cat.take([0, -1, -1], allow_fill=True)
+        [a, NaN, NaN]
+        Categories (2, object): [a, b]
+
+        The fill value can be specified.
+
+        >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
+        [a, a, a]
+        Categories (2, object): [a, b]
+
+        Specifying a fill value that's not in ``self.categories``
+        will raise a ``ValueError``.
+        """
+        return NDArrayBackedExtensionArray.take(
+            self, indexer, allow_fill=allow_fill, fill_value=fill_value
+        )
+
     # ------------------------------------------------------------------
     # NDArrayBackedExtensionArray compat
 

From fc304a09df1ab9c2cf593b2e8fd8aadc37e2b37c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 21 Apr 2020 09:16:34 -0700
Subject: [PATCH 3/3] annotate

---
 pandas/core/arrays/categorical.py  | 6 +++---
 pandas/core/arrays/datetimelike.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 266acf476da12..0bb6ca8315a3a 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -51,7 +51,7 @@
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
 from pandas.core.array_algos.transforms import shift
-from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
+from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
 from pandas.core.arrays.base import _extension_array_shared_docs
 from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
 import pandas.core.common as com
@@ -1769,7 +1769,7 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._constructor(codes, dtype=self.dtype, fastpath=True)
 
-    def take(self, indexer, allow_fill: bool = False, fill_value=None):
+    def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T:
         """
         Take elements from the Categorical.
 
@@ -1851,7 +1851,7 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None):
     def _ndarray(self) -> np.ndarray:
         return self._codes
 
-    def _from_backing_data(self, arr: np.ndarray):
+    def _from_backing_data(self, arr: np.ndarray) -> "Categorical":
         return self._constructor(arr, dtype=self.dtype, fastpath=True)
 
     # ------------------------------------------------------------------
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index d59278354ff8b..f41964cc7b0c8 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -41,7 +41,7 @@
 from pandas.core import missing, nanops, ops
 from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
 from pandas.core.array_algos.transforms import shift
-from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
+from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array
@@ -448,7 +448,7 @@ def _ndarray(self) -> np.ndarray:
         # NB: A bunch of Interval tests fail if we use ._data
         return self.asi8
 
-    def _from_backing_data(self, arr: np.ndarray):
+    def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
         # Note: we do not retain `freq`
         return type(self)(arr, dtype=self.dtype)  # type: ignore