Skip to content

COMPAT: raise SettingWithCopy in even more situations when a view is at hand #7950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 11, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,8 @@ which can take the values ``['raise','warn',None]``, where showing a warning is
'three', 'two', 'one', 'six'],
'c' : np.arange(7)})

# passed via reference (will stay)
# This will show the SettingWithCopyWarning
# but the frame values will be set
dfb['c'][dfb.a.str.startswith('o')] = 42

This however is operating on a copy and will not work.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ API changes
df
df.dtypes

- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`)
- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`)

.. code-block:: python

Expand Down
39 changes: 36 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,14 @@ def _maybe_cache_changed(self, item, value):
@property
def _is_cached(self):
""" boolean : return if I am cached """
return getattr(self, '_cacher', None) is not None

def _get_cacher(self):
""" return my cacher or None """
cacher = getattr(self, '_cacher', None)
return cacher is not None
if cacher is not None:
cacher = cacher[1]()
return cacher

@property
def _is_view(self):
Expand Down Expand Up @@ -1154,8 +1160,35 @@ def _set_is_copy(self, ref=None, copy=True):
else:
self.is_copy = None

def _check_setitem_copy(self, stacklevel=4, t='setting'):
def _check_is_chained_assignment_possible(self):
"""
check if we are a view, have a cacher, and are of mixed type
if so, then force a setitem_copy check

should be called just near setting a value

will return a boolean if it we are a view and are cached, but a single-dtype
meaning that the cacher should be updated following setting
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
return True
elif self.is_copy:
self._check_setitem_copy(stacklevel=4, t='referant')
return False

def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""

Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error

validate if we are doing a settitem on a chained copy.

Expand All @@ -1177,7 +1210,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting'):

"""

if self.is_copy:
if force or self.is_copy:

value = config.get_option('mode.chained_assignment')
if value is None:
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,9 @@ def can_do_equal_len():
if isinstance(value, ABCPanel):
value = self._align_panel(indexer, value)

# check for chained assignment
self.obj._check_is_chained_assignment_possible()

# actually do the set
self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
self.obj._maybe_update_cacher(clear=True)
Expand Down
93 changes: 50 additions & 43 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,61 +587,68 @@ def _get_values(self, indexer):
return self.values[indexer]

def __setitem__(self, key, value):
try:
self._set_with_engine(key, value)
return
except (SettingWithCopyError):
raise
except (KeyError, ValueError):
values = self.values
if (com.is_integer(key)
and not self.index.inferred_type == 'integer'):

values[key] = value
def setitem(key, value):
try:
self._set_with_engine(key, value)
return
elif key is Ellipsis:
self[:] = value
except (SettingWithCopyError):
raise
except (KeyError, ValueError):
values = self.values
if (com.is_integer(key)
and not self.index.inferred_type == 'integer'):

values[key] = value
return
elif key is Ellipsis:
self[:] = value
return
elif _is_bool_indexer(key):
pass
elif com.is_timedelta64_dtype(self.dtype):
# reassign a null value to iNaT
if isnull(value):
value = tslib.iNaT

try:
self.index._engine.set_value(self.values, key, value)
return
except (TypeError):
pass

self.loc[key] = value
return
elif _is_bool_indexer(key):
pass
elif com.is_timedelta64_dtype(self.dtype):
# reassign a null value to iNaT
if isnull(value):
value = tslib.iNaT

try:
self.index._engine.set_value(self.values, key, value)
return
except (TypeError):
pass

self.loc[key] = value
return

except TypeError as e:
if isinstance(key, tuple) and not isinstance(self.index,
MultiIndex):
raise ValueError("Can only tuple-index with a MultiIndex")
except TypeError as e:
if isinstance(key, tuple) and not isinstance(self.index,
MultiIndex):
raise ValueError("Can only tuple-index with a MultiIndex")

# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
raise IndexError(key)
# python 3 type errors should be raised
if 'unorderable' in str(e): # pragma: no cover
raise IndexError(key)

if _is_bool_indexer(key):
key = _check_bool_indexer(self.index, key)
try:
self.where(~key, value, inplace=True)
return
except (InvalidIndexError):
pass
if _is_bool_indexer(key):
key = _check_bool_indexer(self.index, key)
try:
self.where(~key, value, inplace=True)
return
except (InvalidIndexError):
pass

self._set_with(key, value)

self._set_with(key, value)
# do the setitem
cacher_needs_updating = self._check_is_chained_assignment_possible()
setitem(key, value)
if cacher_needs_updating:
self._maybe_update_cacher()

def _set_with_engine(self, key, value):
values = self.values
try:
self.index._engine.set_value(values, key, value)
self._check_setitem_copy()
return
except KeyError:
values[self.index.get_loc(key)] = value
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,13 +305,13 @@ def test_frame_from_json_nones(self):
# infinities get mapped to nulls which get mapped to NaNs during
# deserialisation
df = DataFrame([[1, 2], [4, 5, 6]])
df[2][0] = np.inf
df.loc[0,2] = np.inf
unser = read_json(df.to_json())
self.assertTrue(np.isnan(unser[2][0]))
unser = read_json(df.to_json(), dtype=False)
self.assertTrue(np.isnan(unser[2][0]))

df[2][0] = np.NINF
df.loc[0,2] = np.NINF
unser = read_json(df.to_json())
self.assertTrue(np.isnan(unser[2][0]))
unser = read_json(df.to_json(),dtype=False)
Expand Down
16 changes: 8 additions & 8 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,8 +1278,8 @@ def test_append_with_data_columns(self):
# data column selection with a string data_column
df_new = df.copy()
df_new['string'] = 'foo'
df_new['string'][1:4] = np.nan
df_new['string'][5:6] = 'bar'
df_new.loc[1:4,'string'] = np.nan
df_new.loc[5:6,'string'] = 'bar'
_maybe_remove(store, 'df')
store.append('df', df_new, data_columns=['string'])
result = store.select('df', [Term('string=foo')])
Expand Down Expand Up @@ -1317,14 +1317,14 @@ def check_col(key,name,size):
with ensure_clean_store(self.path) as store:
# multiple data columns
df_new = df.copy()
df_new.loc[:,'A'].iloc[0] = 1.
df_new.loc[:,'B'].iloc[0] = -1.
df_new.ix[0,'A'] = 1.
df_new.ix[0,'B'] = -1.
df_new['string'] = 'foo'
df_new['string'][1:4] = np.nan
df_new['string'][5:6] = 'bar'
df_new.loc[1:4,'string'] = np.nan
df_new.loc[5:6,'string'] = 'bar'
df_new['string2'] = 'foo'
df_new['string2'][2:5] = np.nan
df_new['string2'][7:8] = 'bar'
df_new.loc[2:5,'string2'] = np.nan
df_new.loc[7:8,'string2'] = 'bar'
_maybe_remove(store, 'df')
store.append(
'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,8 +1348,8 @@ def test_to_string(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
s = biggie.to_string()

buf = StringIO()
Expand Down Expand Up @@ -1597,8 +1597,8 @@ def test_to_html(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
s = biggie.to_html()

buf = StringIO()
Expand All @@ -1624,8 +1624,8 @@ def test_to_html_filename(self):
'B': tm.makeStringIndex(200)},
index=lrange(200))

biggie['A'][:20] = nan
biggie['B'][:20] = nan
biggie.loc[:20,'A'] = nan
biggie.loc[:20,'B'] = nan
with tm.ensure_clean('test.html') as path:
biggie.to_html(path)
with open(path, 'r') as f:
Expand Down
Loading