Skip to content

BUG: Fixed handling of boolean indexing with 2-d ndarrays #18645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 30, 2017
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ Indexing
- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`)
- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`)
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`)


I/O
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,10 +2532,10 @@ def __setitem__(self, key, value):
if indexer is not None:
return self._setitem_slice(indexer, value)

if isinstance(key, (Series, np.ndarray, list, Index)):
self._setitem_array(key, value)
elif isinstance(key, DataFrame):
if isinstance(key, DataFrame) or getattr(key, 'ndim', None) == 2:
self._setitem_frame(key, value)
elif isinstance(key, (Series, np.ndarray, list, Index)):
self._setitem_array(key, value)
else:
# set column
self._set_item(key, value)
Expand Down Expand Up @@ -2568,8 +2568,17 @@ def _setitem_array(self, key, value):
def _setitem_frame(self, key, value):
# support boolean setting with DataFrame input, e.g.
# df[df > df2] = 0
if isinstance(key, np.ndarray):
if key.shape != self.shape:
raise ValueError(
'Array conditional must be same shape as self'
)
key = self._constructor(key, **self._construct_axes_dict())

if key.values.size and not is_bool_dtype(key.values):
raise TypeError('Must pass DataFrame with boolean values only')
raise TypeError(
'Must pass DataFrame or 2-d ndarray with boolean values only'
)

self._check_inplace_setting(value)
self._check_setitem_copy()
Expand Down
24 changes: 21 additions & 3 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,9 +524,8 @@ def test_setitem_boolean(self):
values[values == 2] = 3
assert_almost_equal(df.values, values)

with tm.assert_raises_regex(TypeError, 'Must pass '
'DataFrame with '
'boolean values only'):
msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
with tm.assert_raises_regex(TypeError, msg):
df[df * 0] = 2

# index with DataFrame
Expand All @@ -542,6 +541,25 @@ def test_setitem_boolean(self):
np.putmask(expected.values, mask.values, df.values * 2)
assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2,
lambda df: (df > np.abs(df) / 2).values],
ids=['dataframe', 'array'])
def test_setitem_boolean_mask(self, mask_type):

# Test for issue #18582
df = self.frame.copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the issue number as a comment here, also sp on ndarray

mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan

expected = df.copy()
expected.values[mask] = np.nan
assert_frame_equal(result, expected)

def test_setitem_cast(self):
self.frame['D'] = self.frame['D'].astype('i8')
assert self.frame['D'].dtype == np.int64
Expand Down