diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 16ee728a4425a..d6ad5eb2003ce 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -177,6 +177,7 @@ Other enhancements - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`) - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`) - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`) +- :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c29a67c4942db..2b2c11bc6eeb5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5931,7 +5931,7 @@ def dropna( axis: Axis = 0, how: str = "any", thresh=None, - subset=None, + subset: IndexLabel = None, inplace: bool = False, ): """ @@ -5963,7 +5963,7 @@ def dropna( thresh : int, optional Require that many non-NA values. - subset : array-like, optional + subset : column label or sequence of labels, optional Labels along other axis to consider, e.g. if you are dropping rows these would be a list of columns to include. inplace : bool, default False @@ -6047,11 +6047,14 @@ def dropna( agg_obj = self if subset is not None: + # subset needs to be list + if not is_list_like(subset): + subset = [subset] ax = self._get_axis(agg_axis) indices = ax.get_indexer_for(subset) check = indices == -1 if check.any(): - raise KeyError(list(np.compress(check, subset))) + raise KeyError(np.array(subset)[check].tolist()) agg_obj = self.take(indices, axis=agg_axis) count = agg_obj.count(axis=agg_axis) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index bc2b48d3312d7..1207c2763db07 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -243,3 +243,27 @@ def test_dropna_pos_args_deprecation(self): result = df.dropna(1) expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) + + def test_set_single_column_subset(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame( + {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2] + ) + result = df.dropna(subset="C") + tm.assert_frame_equal(result, expected) + + def test_single_column_not_present_in_axis(self): + # GH 41021 + df = DataFrame({"A": [1, 2, 3]}) + + # Column not present + with pytest.raises(KeyError, match="['D']"): + df.dropna(subset="D", axis=0) + + def test_subset_is_nparray(self): + # GH 41021 + df = DataFrame({"A": [1, 2, np.NaN], "B": list("abc"), "C": [4, np.NaN, 5]}) + expected = DataFrame({"A": [1.0], "B": ["a"], "C": [4.0]}) + result = df.dropna(subset=np.array(["A", "C"])) + tm.assert_frame_equal(result, expected)