diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 0301bf0a23dd5..430637ac6d384 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -314,6 +314,7 @@ Indexing - :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 12a4a7fdaedad..faf9f2673b0ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2532,10 +2532,10 @@ def __setitem__(self, key, value): if indexer is not None: return self._setitem_slice(indexer, value) - if isinstance(key, (Series, np.ndarray, list, Index)): - self._setitem_array(key, value) - elif isinstance(key, DataFrame): + if isinstance(key, DataFrame) or getattr(key, 'ndim', None) == 2: self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) else: # set column self._set_item(key, value) @@ -2568,8 +2568,17 @@ def _setitem_array(self, key, value): def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError( + 'Array conditional must be same shape as self' + ) + key = self._constructor(key, **self._construct_axes_dict()) + if key.values.size and not is_bool_dtype(key.values): - raise TypeError('Must pass DataFrame with boolean values only') + raise TypeError( + 'Must pass DataFrame or 2-d ndarray with boolean values only' + ) self._check_inplace_setting(value) self._check_setitem_copy() diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 62bc0eada9d89..882fa634d167d 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -524,9 +524,8 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - with tm.assert_raises_regex(TypeError, 'Must pass ' - 'DataFrame with ' - 'boolean values only'): + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with tm.assert_raises_regex(TypeError, msg): df[df * 0] = 2 # index with DataFrame @@ -542,6 +541,25 @@ def test_setitem_boolean(self): np.putmask(expected.values, mask.values, df.values * 2) assert_frame_equal(df, expected) + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, + lambda df: (df > np.abs(df) / 2).values], + ids=['dataframe', 'array']) + def test_setitem_boolean_mask(self, mask_type): + + # Test for issue #18582 + df = self.frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + assert_frame_equal(result, expected) + def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') assert self.frame['D'].dtype == np.int64