diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 595845e107cf8..216a9846ac241 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -570,7 +570,7 @@ Missing - Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`) - Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with :class:`IntervalDtype` and incompatible value raising instead of casting to a common (usually object) dtype (:issue:`45796`) - Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`) -- +- Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a79e23058ef98..7270d73e29741 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,7 +42,10 @@ properties, ) from pandas._libs.hashtable import duplicated -from pandas._libs.lib import no_default +from pandas._libs.lib import ( + NoDefault, + no_default, +) from pandas._typing import ( AggFuncType, AnyArrayLike, @@ -6110,8 +6113,8 @@ def notnull(self) -> DataFrame: def dropna( self, axis: Axis = 0, - how: str = "any", - thresh=None, + how: str | NoDefault = no_default, + thresh: int | NoDefault = no_default, subset: IndexLabel = None, inplace: bool = False, ): @@ -6143,7 +6146,7 @@ def dropna( * 'all' : If all values are NA, drop that row or column. thresh : int, optional - Require that many non-NA values. + Require that many non-NA values. Cannot be combined with how. subset : column label or sequence of labels, optional Labels along other axis to consider, e.g. if you are dropping rows these would be a list of columns to include. @@ -6218,6 +6221,14 @@ def dropna( name toy born 1 Batman Batmobile 1940-04-25 """ + if (how is not no_default) and (thresh is not no_default): + raise TypeError( + "You cannot set both the how and thresh arguments at the same time." + ) + + if how is no_default: + how = "any" + inplace = validate_bool_kwarg(inplace, "inplace") if isinstance(axis, (tuple, list)): # GH20987 @@ -6238,7 +6249,7 @@ def dropna( raise KeyError(np.array(subset)[check].tolist()) agg_obj = self.take(indices, axis=agg_axis) - if thresh is not None: + if thresh is not no_default: count = agg_obj.count(axis=agg_axis) mask = count >= thresh elif how == "any": @@ -6248,10 +6259,8 @@ def dropna( # faster equivalent to 'agg_obj.count(agg_axis) > 0' mask = notna(agg_obj).any(axis=agg_axis, bool_only=False) else: - if how is not None: + if how is not no_default: raise ValueError(f"invalid how option: {how}") - else: - raise TypeError("must specify how or thresh") if np.all(mask): result = self.copy() diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index d0b9eebb31b93..43cecc6a1aed5 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -158,9 +158,6 @@ def test_dropna_corner(self, float_frame): msg = "invalid how option: foo" with pytest.raises(ValueError, match=msg): float_frame.dropna(how="foo") - msg = "must specify how or thresh" - with pytest.raises(TypeError, match=msg): - float_frame.dropna(how=None) # non-existent column - 8303 with pytest.raises(KeyError, match=r"^\['X'\]$"): float_frame.dropna(subset=["A", "X"]) @@ -274,3 +271,16 @@ def test_no_nans_in_frame(self, axis): expected = df.copy() result = df.dropna(axis=axis) tm.assert_frame_equal(result, expected, check_index_type=True) + + def test_how_thresh_param_incompatible(self): + # GH46575 + df = DataFrame([1, 2, pd.NA]) + msg = "You cannot set both the how and thresh arguments at the same time" + with pytest.raises(TypeError, match=msg): + df.dropna(how="all", thresh=2) + + with pytest.raises(TypeError, match=msg): + df.dropna(how="any", thresh=2) + + with pytest.raises(TypeError, match=msg): + df.dropna(how=None, thresh=None)