diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 55e76512b2440..6cbc19cca99e1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -13,6 +13,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) +- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index c97639481f12c..521e564447c59 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -288,7 +288,8 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0): {{py: # dtype, ctype, table_type, npy_dtype -dtypes = [('int64', 'int64_t', 'int64', 'int64'), +dtypes = [('float64', 'float64_t', 'float64', 'float64'), + ('int64', 'int64_t', 'int64', 'int64'), ('uint64', 'uint64_t', 'uint64', 'uint64'), ('object', 'object', 'pymap', 'object_')] }} @@ -302,11 +303,11 @@ dtypes = [('int64', 'int64_t', 'int64', 'int64'), {{if dtype == 'object'}} -def mode_{{dtype}}(ndarray[{{ctype}}] values): +def mode_{{dtype}}(ndarray[{{ctype}}] values, bint dropna): {{else}} -def mode_{{dtype}}({{ctype}}[:] values): +def mode_{{dtype}}({{ctype}}[:] values, bint dropna): {{endif}} cdef: int count, max_count = 1 @@ -317,9 +318,9 @@ def mode_{{dtype}}({{ctype}}[:] values): table = kh_init_{{table_type}}() {{if dtype == 'object'}} - build_count_table_{{dtype}}(values, table, 1) + build_count_table_{{dtype}}(values, table, dropna) {{else}} - build_count_table_{{dtype}}(values, table, 0) + build_count_table_{{dtype}}(values, table, dropna) {{endif}} modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}}) @@ -329,7 +330,6 @@ def mode_{{dtype}}({{ctype}}[:] values): for k in range(table.n_buckets): if kh_exist_{{table_type}}(table, k): count = table.vals[k] - if count == max_count: j += 1 elif count > max_count: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 88bc497f9f22d..0b0fa69588784 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -25,8 +25,8 @@ is_bool_dtype, needs_i8_conversion, is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, is_interval_dtype, - is_scalar, is_list_like, + is_timedelta64_dtype, is_datetimelike, + is_interval_dtype, is_scalar, is_list_like, _ensure_platform_int, _ensure_object, _ensure_float64, _ensure_uint64, _ensure_int64) @@ -798,7 +798,7 @@ def duplicated(values, keep='first'): return f(values, keep=keep) -def mode(values): +def mode(values, dropna=True): """ Returns the mode(s) of an array. @@ -806,6 +806,10 @@ def mode(values): ---------- values : array-like Array over which to check for duplicate values. + dropna : boolean, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 Returns ------- @@ -818,20 +822,18 @@ def mode(values): # categorical is a fast-path if is_categorical_dtype(values): - if isinstance(values, Series): - return Series(values.values.mode(), name=values.name) - return values.mode() + return Series(values.values.mode(dropna=dropna), name=values.name) + return values.mode(dropna=dropna) - values, dtype, ndtype = _ensure_data(values) + if dropna and is_datetimelike(values): + mask = values.isnull() + values = values[~mask] - # TODO: this should support float64 - if ndtype not in ['int64', 'uint64', 'object']: - ndtype = 'object' - values = _ensure_object(values) + values, dtype, ndtype = _ensure_data(values) f = getattr(htable, "mode_{dtype}".format(dtype=ndtype)) - result = f(values) + result = f(values, dropna=dropna) try: result = np.sort(result) except TypeError as e: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a1a8f098b582e..30f9c56d24f02 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2118,20 +2118,30 @@ def max(self, numeric_only=None, **kwargs): else: return self.categories[pointer] - def mode(self): + def mode(self, dropna=True): """ Returns the mode(s) of the Categorical. Always returns `Categorical` even if only one value. + Parameters + ---------- + dropna : boolean, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + Returns ------- modes : `Categorical` (sorted) """ import pandas._libs.hashtable as htable - good = self._codes != -1 - values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) + values = self._codes + if dropna: + good = self._codes != -1 + values = self._codes[good] + values = sorted(htable.mode_int64(_ensure_int64(values), dropna)) result = self._constructor(values=values, categories=self.categories, ordered=self.ordered, fastpath=True) return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1c7339a91c2fd..e75d76cf612e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7038,7 +7038,7 @@ def _get_agg_axis(self, axis_num): else: raise ValueError('Axis must be 0 or 1 (got %r)' % axis_num) - def mode(self, axis=0, numeric_only=False): + def mode(self, axis=0, numeric_only=False, dropna=True): """ Gets the mode(s) of each element along the axis selected. Adds a row for each mode per label, fills in gaps with nan. @@ -7056,6 +7056,10 @@ def mode(self, axis=0, numeric_only=False): * 1 or 'columns' : get mode of each row numeric_only : boolean, default False if True, only apply to numeric columns + dropna : boolean, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 Returns ------- @@ -7072,7 +7076,7 @@ def mode(self, axis=0, numeric_only=False): data = self if not numeric_only else self._get_numeric_data() def f(s): - return s.mode() + return s.mode(dropna=dropna) return data.apply(f, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index d59401414181f..4cf29319f703e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1431,17 +1431,24 @@ def count(self, level=None): return self._constructor(out, index=lev, dtype='int64').__finalize__(self) - def mode(self): + def mode(self, dropna=True): """Return the mode(s) of the dataset. Always returns Series even if only one value is returned. + Parameters + ------- + dropna : boolean, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + Returns ------- modes : Series (sorted) """ # TODO: Add option for bins like value_counts() - return algorithms.mode(self) + return algorithms.mode(self, dropna=dropna) def unique(self): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index d1a4a5f615b86..b8f1acc2aa679 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -15,11 +15,11 @@ from pandas.compat import lrange, product, PY35 from pandas import (compat, isna, notna, DataFrame, Series, MultiIndex, date_range, Timestamp, Categorical, - _np_version_under1p12, _np_version_under1p15) + _np_version_under1p12, _np_version_under1p15, + to_datetime, to_timedelta) import pandas as pd import pandas.core.nanops as nanops import pandas.core.algorithms as algorithms -import pandas.io.formats.printing as printing import pandas.util.testing as tm import pandas.util._test_decorators as td @@ -840,54 +840,74 @@ def wrapper(x): expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected) - def test_mode(self): - df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], - "B": [10, 10, 10, np.nan, 3, 4], - "C": [8, 8, 8, 9, 9, 9], - "D": np.arange(6, dtype='int64'), - "E": [8, 8, 1, 1, 3, 3]}) - tm.assert_frame_equal(df[["A"]].mode(), - pd.DataFrame({"A": [12]})) - expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ - to_frame() - tm.assert_frame_equal(df[["D"]].mode(), expected) - expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() - tm.assert_frame_equal(df[["E"]].mode(), expected) - tm.assert_frame_equal(df[["A", "B"]].mode(), - pd.DataFrame({"A": [12], "B": [10.]})) - tm.assert_frame_equal(df.mode(), - pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, - np.nan, np.nan], - "B": [10, np.nan, np.nan, np.nan, - np.nan, np.nan], - "C": [8, 9, np.nan, np.nan, np.nan, - np.nan], - "D": [0, 1, 2, 3, 4, 5], - "E": [1, 3, 8, np.nan, np.nan, - np.nan]})) - - # outputs in sorted order - df["C"] = list(reversed(df["C"])) - printing.pprint_thing(df["C"]) - printing.pprint_thing(df["C"].mode()) - a, b = (df[["A", "B", "C"]].mode(), - pd.DataFrame({"A": [12, np.nan], - "B": [10, np.nan], - "C": [8, 9]})) - printing.pprint_thing(a) - printing.pprint_thing(b) - tm.assert_frame_equal(a, b) - # should work with heterogeneous types - df = pd.DataFrame({"A": np.arange(6, dtype='int64'), - "B": pd.date_range('2011', periods=6), - "C": list('abcdef')}) - exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), - dtype=df["A"].dtype), - "B": pd.Series(pd.date_range('2011', periods=6), - dtype=df["B"].dtype), - "C": pd.Series(list('abcdef'), - dtype=df["C"].dtype)}) - tm.assert_frame_equal(df.mode(), exp) + @pytest.mark.parametrize("dropna, expected", [ + (True, {'A': [12], + 'B': [10.0], + 'C': [1.0], + 'D': ['a'], + 'E': Categorical(['a'], categories=['a']), + 'F': to_datetime(['2000-1-2']), + 'G': to_timedelta(['1 days'])}), + (False, {'A': [12], + 'B': [10.0], + 'C': [np.nan], + 'D': np.array([np.nan], dtype=object), + 'E': Categorical([np.nan], categories=['a']), + 'F': [pd.NaT], + 'G': to_timedelta([pd.NaT])}), + (True, {'H': [8, 9, np.nan, np.nan], + 'I': [8, 9, np.nan, np.nan], + 'J': [1, np.nan, np.nan, np.nan], + 'K': Categorical(['a', np.nan, np.nan, np.nan], + categories=['a']), + 'L': to_datetime(['2000-1-2', 'NaT', 'NaT', 'NaT']), + 'M': to_timedelta(['1 days', 'nan', 'nan', 'nan']), + 'N': [0, 1, 2, 3]}), + (False, {'H': [8, 9, np.nan, np.nan], + 'I': [8, 9, np.nan, np.nan], + 'J': [1, np.nan, np.nan, np.nan], + 'K': Categorical([np.nan, 'a', np.nan, np.nan], + categories=['a']), + 'L': to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), + 'M': to_timedelta(['nan', '1 days', 'nan', 'nan']), + 'N': [0, 1, 2, 3]}) + ]) + def test_mode_dropna(self, dropna, expected): + + df = DataFrame({"A": [12, 12, 19, 11], + "B": [10, 10, np.nan, 3], + "C": [1, np.nan, np.nan, np.nan], + "D": [np.nan, np.nan, 'a', np.nan], + "E": Categorical([np.nan, np.nan, 'a', np.nan]), + "F": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), + "G": to_timedelta(['1 days', 'nan', 'nan', 'nan']), + "H": [8, 8, 9, 9], + "I": [9, 9, 8, 8], + "J": [1, 1, np.nan, np.nan], + "K": Categorical(['a', np.nan, 'a', np.nan]), + "L": to_datetime(['2000-1-2', '2000-1-2', + 'NaT', 'NaT']), + "M": to_timedelta(['1 days', 'nan', + '1 days', 'nan']), + "N": np.arange(4, dtype='int64')}) + + result = df[sorted(list(expected.keys()))].mode(dropna=dropna) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(not compat.PY3, reason="only PY3") + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + df = DataFrame({"A": [np.nan, np.nan, 'a', 'a']}) + expected = DataFrame({'A': ['a', np.nan]}) + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + result = df.mode(dropna=False) + result = result.sort_values(by='A').reset_index(drop=True) + + tm.assert_frame_equal(result, expected) def test_operators_timedelta64(self): from datetime import timedelta diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6ea40329f4bc3..14ae1ef42865a 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -225,102 +225,6 @@ def test_median(self): int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) tm.assert_almost_equal(np.median(int_ts), int_ts.median()) - def test_mode(self): - # No mode should be found. - exp = Series([], dtype=np.float64) - tm.assert_series_equal(Series([]).mode(), exp) - - exp = Series([1], dtype=np.int64) - tm.assert_series_equal(Series([1]).mode(), exp) - - exp = Series(['a', 'b', 'c'], dtype=np.object) - tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp) - - # Test numerical data types. - exp_single = [1] - data_single = [1] * 5 + [2] * 3 - - exp_multi = [1, 3] - data_multi = [1] * 5 + [2] * 3 + [3] * 5 - - for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: - s = Series(data_single, dtype=dt) - exp = Series(exp_single, dtype=dt) - tm.assert_series_equal(s.mode(), exp) - - s = Series(data_multi, dtype=dt) - exp = Series(exp_multi, dtype=dt) - tm.assert_series_equal(s.mode(), exp) - - # Test string and object types. - exp = ['b'] - data = ['a'] * 2 + ['b'] * 3 - - s = Series(data, dtype='c') - exp = Series(exp, dtype='c') - tm.assert_series_equal(s.mode(), exp) - - exp = ['bar'] - data = ['foo'] * 2 + ['bar'] * 3 - - for dt in [str, object]: - s = Series(data, dtype=dt) - exp = Series(exp, dtype=dt) - tm.assert_series_equal(s.mode(), exp) - - # Test datetime types. - exp = Series(['1900-05-03', '2011-01-03', - '2013-01-02'], dtype='M8[ns]') - s = Series(['2011-01-03', '2013-01-02', - '1900-05-03'], dtype='M8[ns]') - tm.assert_series_equal(s.mode(), exp) - - exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]') - s = Series(['2011-01-03', '2013-01-02', '1900-05-03', - '2011-01-03', '2013-01-02'], dtype='M8[ns]') - tm.assert_series_equal(s.mode(), exp) - - # gh-5986: Test timedelta types. - exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]') - s = Series(['1 days', '-1 days', '0 days'], - dtype='timedelta64[ns]') - tm.assert_series_equal(s.mode(), exp) - - exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]') - s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min', - '2 min', '2 min'], dtype='timedelta64[ns]') - tm.assert_series_equal(s.mode(), exp) - - # Test mixed dtype. - exp = Series(['foo']) - s = Series([1, 'foo', 'foo']) - tm.assert_series_equal(s.mode(), exp) - - # Test for uint64 overflow. - exp = Series([2**63], dtype=np.uint64) - s = Series([1, 2**63, 2**63], dtype=np.uint64) - tm.assert_series_equal(s.mode(), exp) - - exp = Series([1, 2**63], dtype=np.uint64) - s = Series([1, 2**63], dtype=np.uint64) - tm.assert_series_equal(s.mode(), exp) - - # Test category dtype. - c = Categorical([1, 2]) - exp = Categorical([1, 2], categories=[1, 2]) - exp = Series(exp, dtype='category') - tm.assert_series_equal(Series(c).mode(), exp) - - c = Categorical([1, 'a', 'a']) - exp = Categorical(['a'], categories=[1, 'a']) - exp = Series(exp, dtype='category') - tm.assert_series_equal(Series(c).mode(), exp) - - c = Categorical([1, 1, 2, 3, 3]) - exp = Categorical([1, 3], categories=[1, 2, 3]) - exp = Series(exp, dtype='category') - tm.assert_series_equal(Series(c).mode(), exp) - def test_prod(self): self._check_stat_op('prod', np.prod) @@ -1866,6 +1770,180 @@ def s_main_dtypes(): return df +class TestMode(object): + + @pytest.mark.parametrize('dropna, expected', [ + (True, Series([], dtype=np.float64)), + (False, Series([], dtype=np.float64)) + ]) + def test_mode_empty(self, dropna, expected): + s = Series([], dtype=np.float64) + result = s.mode(dropna) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dropna, data, expected', [ + (True, [1, 1, 1, 2], [1]), + (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + (False, [1, 1, 1, 2], [1]), + (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + ]) + @pytest.mark.parametrize( + 'dt', + list(np.typecodes['AllInteger'] + np.typecodes['Float']) + ) + def test_mode_numerical(self, dropna, data, expected, dt): + s = Series(data, dtype=dt) + result = s.mode(dropna) + expected = Series(expected, dtype=dt) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dropna, expected', [ + (True, [1.0]), + (False, [1, np.nan]), + ]) + def test_mode_numerical_nan(self, dropna, expected): + s = Series([1, 1, 2, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [ + (True, ['b'], ['bar'], ['nan']), + (False, ['b'], [np.nan], ['nan']) + ]) + def test_mode_str_obj(self, dropna, expected1, expected2, expected3): + # Test string and object types. + data = ['a'] * 2 + ['b'] * 3 + + s = Series(data, dtype='c') + result = s.mode(dropna) + expected1 = Series(expected1, dtype='c') + tm.assert_series_equal(result, expected1) + + data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan] + + s = Series(data, dtype=object) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected2) + + data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan] + + s = Series(data, dtype=str) + result = s.mode(dropna) + expected3 = Series(expected3, dtype=str) + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize('dropna, expected1, expected2', [ + (True, ['foo'], ['foo']), + (False, ['foo'], [np.nan]) + ]) + def test_mode_mixeddtype(self, dropna, expected1, expected2): + s = Series([1, 'foo', 'foo']) + result = s.mode(dropna) + expected = Series(expected1) + tm.assert_series_equal(result, expected) + + s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dropna, expected1, expected2', [ + (True, ['1900-05-03', '2011-01-03', '2013-01-02'], + ['2011-01-03', '2013-01-02']), + (False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']), + ]) + def test_mode_datetime(self, dropna, expected1, expected2): + s = Series(['2011-01-03', '2013-01-02', + '1900-05-03', 'nan', 'nan'], dtype='M8[ns]') + result = s.mode(dropna) + expected1 = Series(expected1, dtype='M8[ns]') + tm.assert_series_equal(result, expected1) + + s = Series(['2011-01-03', '2013-01-02', '1900-05-03', + '2011-01-03', '2013-01-02', 'nan', 'nan'], + dtype='M8[ns]') + result = s.mode(dropna) + expected2 = Series(expected2, dtype='M8[ns]') + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize('dropna, expected1, expected2', [ + (True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']), + (False, [np.nan], [np.nan, '2 min', '1 day']), + ]) + def test_mode_timedelta(self, dropna, expected1, expected2): + # gh-5986: Test timedelta types. + + s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'], + dtype='timedelta64[ns]') + result = s.mode(dropna) + expected1 = Series(expected1, dtype='timedelta64[ns]') + tm.assert_series_equal(result, expected1) + + s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min', + '2 min', '2 min', 'nan', 'nan'], + dtype='timedelta64[ns]') + result = s.mode(dropna) + expected2 = Series(expected2, dtype='timedelta64[ns]') + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [ + (True, Categorical([1, 2], categories=[1, 2]), + Categorical(['a'], categories=[1, 'a']), + Categorical([3, 1], categories=[3, 2, 1], ordered=True)), + (False, Categorical([np.nan], categories=[1, 2]), + Categorical([np.nan, 'a'], categories=[1, 'a']), + Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True)), + ]) + def test_mode_category(self, dropna, expected1, expected2, expected3): + s = Series(Categorical([1, 2, np.nan, np.nan])) + result = s.mode(dropna) + expected1 = Series(expected1, dtype='category') + tm.assert_series_equal(result, expected1) + + s = Series(Categorical([1, 'a', 'a', np.nan, np.nan])) + result = s.mode(dropna) + expected2 = Series(expected2, dtype='category') + tm.assert_series_equal(result, expected2) + + s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan], + categories=[3, 2, 1], ordered=True)) + result = s.mode(dropna) + expected3 = Series(expected3, dtype='category') + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize('dropna, expected1, expected2', [ + (True, [2**63], [1, 2**63]), + (False, [2**63], [1, 2**63]) + ]) + def test_mode_intoverflow(self, dropna, expected1, expected2): + # Test for uint64 overflow. + s = Series([1, 2**63, 2**63], dtype=np.uint64) + result = s.mode(dropna) + expected1 = Series(expected1, dtype=np.uint64) + tm.assert_series_equal(result, expected1) + + s = Series([1, 2**63], dtype=np.uint64) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=np.uint64) + tm.assert_series_equal(result, expected2) + + @pytest.mark.skipif(not compat.PY3, reason="only PY3") + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + expected = Series(['foo', np.nan]) + s = Series([1, 'foo', 'foo', np.nan, np.nan]) + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + result = s.mode(dropna=False) + result = result.sort_values().reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + class TestNLargestNSmallest(object): @pytest.mark.parametrize( @@ -1994,26 +2072,6 @@ def test_min_max(self): assert np.isnan(_min) assert _max == 1 - def test_mode(self): - s = Series(Categorical([1, 1, 2, 4, 5, 5, 5], - categories=[5, 4, 3, 2, 1], ordered=True)) - res = s.mode() - exp = Series(Categorical([5], categories=[ - 5, 4, 3, 2, 1], ordered=True)) - tm.assert_series_equal(res, exp) - s = Series(Categorical([1, 1, 1, 4, 5, 5, 5], - categories=[5, 4, 3, 2, 1], ordered=True)) - res = s.mode() - exp = Series(Categorical([5, 1], categories=[ - 5, 4, 3, 2, 1], ordered=True)) - tm.assert_series_equal(res, exp) - s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], - ordered=True)) - res = s.mode() - exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], - ordered=True)) - tm.assert_series_equal(res, exp) - def test_value_counts(self): # GH 12835 cats = Categorical(list('abcccb'), categories=list('cabd'))