ENH: PandasArray.value_counts (#39387)

jbrockmendel · web-flow · commit 799143d2c212 · 2021-01-25T11:54:52.000-05:00
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -17,7 +17,7 @@
 from pandas.core.dtypes.missing import array_equivalent
 
 from pandas.core import missing
-from pandas.core.algorithms import take, unique
+from pandas.core.algorithms import take, unique, value_counts
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.construction import extract_array
@@ -367,3 +367,37 @@ def where(
     def delete(self: NDArrayBackedExtensionArrayT, loc) -> NDArrayBackedExtensionArrayT:
         res_values = np.delete(self._ndarray, loc)
         return self._from_backing_data(res_values)
+
+    # ------------------------------------------------------------------------
+    # Additional array methods
+    #  These are not part of the EA API, but we implement them because
+    #  pandas assumes they're there.
+
+    def value_counts(self, dropna: bool = False):
+        """
+        Return a Series containing counts of unique values.
+
+        Parameters
+        ----------
+        dropna : bool, default False
+            Don't include counts of NA values.
+
+        Returns
+        -------
+        Series
+        """
+        if self.ndim != 1:
+            raise NotImplementedError
+
+        from pandas import Index, Series
+
+        if dropna:
+            values = self[~self.isna()]._ndarray
+        else:
+            values = self._ndarray
+
+        result = value_counts(values, sort=False, dropna=dropna)
+
+        index_arr = self._from_backing_data(np.asarray(result.index._data))
+        index = Index(index_arr, name=result.index.name)
+        return Series(result._values, index=index, name=result.name)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -63,7 +63,7 @@
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
 
 from pandas.core import nanops, ops
-from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
+from pandas.core.algorithms import checked_add_with_arr, isin, unique1d
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray, ravel_compat
 import pandas.core.common as com
@@ -691,37 +691,6 @@ def _unbox(
     #  These are not part of the EA API, but we implement them because
     #  pandas assumes they're there.
 
-    def value_counts(self, dropna: bool = False):
-        """
-        Return a Series containing counts of unique values.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of NaT values.
-
-        Returns
-        -------
-        Series
-        """
-        if self.ndim != 1:
-            raise NotImplementedError
-
-        from pandas import Index, Series
-
-        if dropna:
-            values = self[~self.isna()]._ndarray
-        else:
-            values = self._ndarray
-
-        cls = type(self)
-
-        result = value_counts(values, sort=False, dropna=dropna)
-        index = Index(
-            cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name
-        )
-        return Series(result._values, index=index, name=result.name)
-
     @ravel_compat
     def map(self, mapper):
         # TODO(GH-23179): Add ExtensionArray.map
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -76,12 +76,12 @@
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
+    PandasDtype,
     PeriodDtype,
 )
 from pandas.core.dtypes.generic import (
     ABCDatetimeIndex,
     ABCMultiIndex,
-    ABCPandasArray,
     ABCPeriodIndex,
     ABCSeries,
     ABCTimedeltaIndex,
@@ -281,6 +281,7 @@ def __new__(
                 stacklevel=2,
             )
 
+        from pandas.core.arrays import PandasArray
         from pandas.core.indexes.range import RangeIndex
 
         name = maybe_extract_name(name, data, cls)
@@ -292,9 +293,11 @@ def __new__(
             validate_tz_from_dtype(dtype, tz)
             dtype = tz_to_dtype(tz)
 
-        if isinstance(data, ABCPandasArray):
+        if isinstance(data, PandasArray):
             # ensure users don't accidentally put a PandasArray in an index.
             data = data.to_numpy()
+        if isinstance(dtype, PandasDtype):
+            dtype = dtype.numpy_dtype
 
         data_dtype = getattr(data, "dtype", None)
 
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -225,14 +225,6 @@ def test_array_interface(self, data):
 
 
 class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
-    @pytest.mark.skip(reason="TODO: remove?")
-    def test_value_counts(self, all_data, dropna):
-        pass
-
-    @pytest.mark.xfail(reason="not working. will be covered by #32028")
-    def test_value_counts_with_normalize(self, data):
-        return super().test_value_counts_with_normalize(data)
-
     @skip_nested
     def test_shift_fill_value(self, data):
         # np.array shape inference. Shift implementation fails.