Skip to content

Commit 799143d

Browse files
authored
ENH: PandasArray.value_counts (#39387)
1 parent 62c7dd6 commit 799143d

File tree

4 files changed

+41
-43
lines changed

4 files changed

+41
-43
lines changed

pandas/core/arrays/_mixins.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.core.dtypes.missing import array_equivalent
1818

1919
from pandas.core import missing
20-
from pandas.core.algorithms import take, unique
20+
from pandas.core.algorithms import take, unique, value_counts
2121
from pandas.core.array_algos.transforms import shift
2222
from pandas.core.arrays.base import ExtensionArray
2323
from pandas.core.construction import extract_array
@@ -367,3 +367,37 @@ def where(
367367
def delete(self: NDArrayBackedExtensionArrayT, loc) -> NDArrayBackedExtensionArrayT:
368368
res_values = np.delete(self._ndarray, loc)
369369
return self._from_backing_data(res_values)
370+
371+
# ------------------------------------------------------------------------
372+
# Additional array methods
373+
# These are not part of the EA API, but we implement them because
374+
# pandas assumes they're there.
375+
376+
def value_counts(self, dropna: bool = False):
377+
"""
378+
Return a Series containing counts of unique values.
379+
380+
Parameters
381+
----------
382+
dropna : bool, default False
383+
Don't include counts of NA values.
384+
385+
Returns
386+
-------
387+
Series
388+
"""
389+
if self.ndim != 1:
390+
raise NotImplementedError
391+
392+
from pandas import Index, Series
393+
394+
if dropna:
395+
values = self[~self.isna()]._ndarray
396+
else:
397+
values = self._ndarray
398+
399+
result = value_counts(values, sort=False, dropna=dropna)
400+
401+
index_arr = self._from_backing_data(np.asarray(result.index._data))
402+
index = Index(index_arr, name=result.index.name)
403+
return Series(result._values, index=index, name=result.name)

pandas/core/arrays/datetimelike.py

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
6464

6565
from pandas.core import nanops, ops
66-
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
66+
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d
6767
from pandas.core.arraylike import OpsMixin
6868
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray, ravel_compat
6969
import pandas.core.common as com
@@ -691,37 +691,6 @@ def _unbox(
691691
# These are not part of the EA API, but we implement them because
692692
# pandas assumes they're there.
693693

694-
def value_counts(self, dropna: bool = False):
695-
"""
696-
Return a Series containing counts of unique values.
697-
698-
Parameters
699-
----------
700-
dropna : bool, default True
701-
Don't include counts of NaT values.
702-
703-
Returns
704-
-------
705-
Series
706-
"""
707-
if self.ndim != 1:
708-
raise NotImplementedError
709-
710-
from pandas import Index, Series
711-
712-
if dropna:
713-
values = self[~self.isna()]._ndarray
714-
else:
715-
values = self._ndarray
716-
717-
cls = type(self)
718-
719-
result = value_counts(values, sort=False, dropna=dropna)
720-
index = Index(
721-
cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name
722-
)
723-
return Series(result._values, index=index, name=result.name)
724-
725694
@ravel_compat
726695
def map(self, mapper):
727696
# TODO(GH-23179): Add ExtensionArray.map

pandas/core/indexes/base.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@
7676
DatetimeTZDtype,
7777
ExtensionDtype,
7878
IntervalDtype,
79+
PandasDtype,
7980
PeriodDtype,
8081
)
8182
from pandas.core.dtypes.generic import (
8283
ABCDatetimeIndex,
8384
ABCMultiIndex,
84-
ABCPandasArray,
8585
ABCPeriodIndex,
8686
ABCSeries,
8787
ABCTimedeltaIndex,
@@ -281,6 +281,7 @@ def __new__(
281281
stacklevel=2,
282282
)
283283

284+
from pandas.core.arrays import PandasArray
284285
from pandas.core.indexes.range import RangeIndex
285286

286287
name = maybe_extract_name(name, data, cls)
@@ -292,9 +293,11 @@ def __new__(
292293
validate_tz_from_dtype(dtype, tz)
293294
dtype = tz_to_dtype(tz)
294295

295-
if isinstance(data, ABCPandasArray):
296+
if isinstance(data, PandasArray):
296297
# ensure users don't accidentally put a PandasArray in an index.
297298
data = data.to_numpy()
299+
if isinstance(dtype, PandasDtype):
300+
dtype = dtype.numpy_dtype
298301

299302
data_dtype = getattr(data, "dtype", None)
300303

pandas/tests/extension/test_numpy.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,6 @@ def test_array_interface(self, data):
225225

226226

227227
class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
228-
@pytest.mark.skip(reason="TODO: remove?")
229-
def test_value_counts(self, all_data, dropna):
230-
pass
231-
232-
@pytest.mark.xfail(reason="not working. will be covered by #32028")
233-
def test_value_counts_with_normalize(self, data):
234-
return super().test_value_counts_with_normalize(data)
235-
236228
@skip_nested
237229
def test_shift_fill_value(self, data):
238230
# np.array shape inference. Shift implementation fails.

0 commit comments

Comments
 (0)