diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 65167e6467fd5..760da36a30075 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -563,6 +563,14 @@ def time_frame_nunique(self): self.df.nunique() +class SeriesNuniqueWithNan: + def setup(self): + self.ser = Series(100000 * (100 * [np.nan] + list(range(100)))).astype(float) + + def time_series_nunique_nan(self): + self.ser.nunique() + + class Duplicated: def setup(self): n = 1 << 20 diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 74710ca48308c..be2dbd83dd23d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -635,7 +635,7 @@ Performance improvements - Performance improvement in the conversion of pyarrow boolean array to a pandas nullable boolean array (:issue:`41051`) - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`) - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) -- +- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 42f52618eb07b..36a90468eff7f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1040,8 +1040,10 @@ def nunique(self, dropna: bool = True) -> int: >>> s.nunique() 4 """ - obj = remove_na_arraylike(self) if dropna else self - return len(obj.unique()) + uniqs = self.unique() + if dropna: + uniqs = remove_na_arraylike(uniqs) + return len(uniqs) @property def is_unique(self) -> bool: