Skip to content

Commit 1264ef5

Browse files
committed
COMPAT: raise SettingWithCopy in even more situations when a view is at hand
1 parent db8533f commit 1264ef5

File tree

9 files changed

+127
-64
lines changed

9 files changed

+127
-64
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ API changes
114114
df
115115
df.dtypes
116116

117-
- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`)
117+
- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`)
118118

119119
.. code-block:: python
120120

pandas/core/generic.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,13 @@ def _is_cached(self):
10911091
cacher = getattr(self, '_cacher', None)
10921092
return cacher is not None
10931093

1094+
def _get_cacher(self):
1095+
""" return my cahcer or None """
1096+
cacher = getattr(self, '_cacher', None)
1097+
if cacher is not None:
1098+
cacher = cacher[1]()
1099+
return cacher
1100+
10941101
@property
10951102
def _is_view(self):
10961103
""" boolean : return if I am a view of another array """
@@ -1154,8 +1161,30 @@ def _set_is_copy(self, ref=None, copy=True):
11541161
else:
11551162
self.is_copy = None
11561163

1157-
def _check_setitem_copy(self, stacklevel=4, t='setting'):
1164+
def _check_is_chained_assignment_possible(self):
1165+
"""
1166+
check if we are a view, have a cacher, and are of mixed type
1167+
if so, then force a setitem_copy check
1168+
1169+
should be called just near setting a value
11581170
"""
1171+
if self._is_view and self._is_cached:
1172+
ref = self._get_cacher()
1173+
if ref is not None and ref._is_mixed_type:
1174+
self._check_setitem_copy(stacklevel=5, t='referant', force=True)
1175+
elif self.is_copy:
1176+
self._check_setitem_copy(stacklevel=5, t='referant')
1177+
1178+
def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
1179+
"""
1180+
1181+
Parameters
1182+
----------
1183+
stacklevel : integer, default 4
1184+
the level to show of the stack when the error is output
1185+
t : string, the type of setting error
1186+
force : boolean, default False
1187+
if True, then force showing an error
11591188
11601189
validate if we are doing a settitem on a chained copy.
11611190
@@ -1177,7 +1206,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting'):
11771206
11781207
"""
11791208

1180-
if self.is_copy:
1209+
if force or self.is_copy:
11811210

11821211
value = config.get_option('mode.chained_assignment')
11831212
if value is None:

pandas/core/indexing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,9 @@ def can_do_equal_len():
472472
if isinstance(value, ABCPanel):
473473
value = self._align_panel(indexer, value)
474474

475+
# check for chained assignment
476+
self.obj._check_is_chained_assignment_possible()
477+
475478
# actually do the set
476479
self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
477480
self.obj._maybe_update_cacher(clear=True)

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ def _set_with_engine(self, key, value):
641641
values = self.values
642642
try:
643643
self.index._engine.set_value(values, key, value)
644-
self._check_setitem_copy()
644+
self._check_is_chained_assignment_possible()
645645
return
646646
except KeyError:
647647
values[self.index.get_loc(key)] = value

pandas/io/tests/test_json/test_pandas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,13 @@ def test_frame_from_json_nones(self):
305305
# infinities get mapped to nulls which get mapped to NaNs during
306306
# deserialisation
307307
df = DataFrame([[1, 2], [4, 5, 6]])
308-
df[2][0] = np.inf
308+
df.loc[0,2] = np.inf
309309
unser = read_json(df.to_json())
310310
self.assertTrue(np.isnan(unser[2][0]))
311311
unser = read_json(df.to_json(), dtype=False)
312312
self.assertTrue(np.isnan(unser[2][0]))
313313

314-
df[2][0] = np.NINF
314+
df.loc[0,2] = np.NINF
315315
unser = read_json(df.to_json())
316316
self.assertTrue(np.isnan(unser[2][0]))
317317
unser = read_json(df.to_json(),dtype=False)

pandas/tests/test_frame.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
import pandas.core.format as fmt
3333
import pandas.core.datetools as datetools
3434
from pandas import (DataFrame, Index, Series, notnull, isnull,
35-
MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv)
35+
MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv,
36+
option_context)
3637
import pandas as pd
3738
from pandas.parser import CParserError
3839
from pandas.util.misc import is_little_endian
@@ -4469,13 +4470,13 @@ def test_repr(self):
44694470

44704471
def test_repr_dimensions(self):
44714472
df = DataFrame([[1, 2,], [3, 4]])
4472-
with pd.option_context('display.show_dimensions', True):
4473+
with option_context('display.show_dimensions', True):
44734474
self.assertTrue("2 rows x 2 columns" in repr(df))
44744475

4475-
with pd.option_context('display.show_dimensions', False):
4476+
with option_context('display.show_dimensions', False):
44764477
self.assertFalse("2 rows x 2 columns" in repr(df))
44774478

4478-
with pd.option_context('display.show_dimensions', 'truncate'):
4479+
with option_context('display.show_dimensions', 'truncate'):
44794480
self.assertFalse("2 rows x 2 columns" in repr(df))
44804481

44814482
@slow
@@ -6475,7 +6476,7 @@ def test_info_max_cols(self):
64756476
df = DataFrame(np.random.randn(10, 5))
64766477
for len_, verbose in [(4, None), (4, False), (9, True)]:
64776478
# For verbose always ^ setting ^ summarize ^ full output
6478-
with pd.option_context('max_info_columns', 4):
6479+
with option_context('max_info_columns', 4):
64796480
buf = StringIO()
64806481
df.info(buf=buf, verbose=verbose)
64816482
res = buf.getvalue()
@@ -6484,22 +6485,22 @@ def test_info_max_cols(self):
64846485
for len_, verbose in [(9, None), (4, False), (9, True)]:
64856486

64866487
# max_cols no exceeded
6487-
with pd.option_context('max_info_columns', 5):
6488+
with option_context('max_info_columns', 5):
64886489
buf = StringIO()
64896490
df.info(buf=buf, verbose=verbose)
64906491
res = buf.getvalue()
64916492
self.assertEqual(len(res.split('\n')), len_)
64926493

64936494
for len_, max_cols in [(9, 5), (4, 4)]:
64946495
# setting truncates
6495-
with pd.option_context('max_info_columns', 4):
6496+
with option_context('max_info_columns', 4):
64966497
buf = StringIO()
64976498
df.info(buf=buf, max_cols=max_cols)
64986499
res = buf.getvalue()
64996500
self.assertEqual(len(res.split('\n')), len_)
65006501

65016502
# setting wouldn't truncate
6502-
with pd.option_context('max_info_columns', 5):
6503+
with option_context('max_info_columns', 5):
65036504
buf = StringIO()
65046505
df.info(buf=buf, max_cols=max_cols)
65056506
res = buf.getvalue()
@@ -9872,7 +9873,7 @@ def test_apply_modify_traceback(self):
98729873
'E': np.random.randn(11),
98739874
'F': np.random.randn(11)})
98749875

9875-
data['C'][4] = np.nan
9876+
data.loc[4,'C'] = np.nan
98769877

98779878
def transform(row):
98789879
if row['C'].startswith('shin') and row['A'] == 'foo':
@@ -12551,15 +12552,18 @@ def test_idxmax(self):
1255112552
self.assertRaises(ValueError, frame.idxmax, axis=2)
1255212553

1255312554
def test_stale_cached_series_bug_473(self):
12554-
Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'),
12555-
columns=('e', 'f', 'g', 'h'))
12556-
repr(Y)
12557-
Y['e'] = Y['e'].astype('object')
12558-
Y['g']['c'] = np.NaN
12559-
repr(Y)
12560-
result = Y.sum()
12561-
exp = Y['g'].sum()
12562-
self.assertTrue(isnull(Y['g']['c']))
12555+
12556+
# this is chained, but ok
12557+
with option_context('chained_assignment',None):
12558+
Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'),
12559+
columns=('e', 'f', 'g', 'h'))
12560+
repr(Y)
12561+
Y['e'] = Y['e'].astype('object')
12562+
Y['g']['c'] = np.NaN
12563+
repr(Y)
12564+
result = Y.sum()
12565+
exp = Y['g'].sum()
12566+
self.assertTrue(isnull(Y['g']['c']))
1256312567

1256412568
def test_index_namedtuple(self):
1256512569
from collections import namedtuple

pandas/tests/test_generic.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,8 @@ def test_interp_basic(self):
734734

735735
result = df.set_index('C').interpolate()
736736
expected = df.set_index('C')
737-
expected.A.loc[3] = 3
738-
expected.B.loc[5] = 9
737+
expected.loc[3,'A'] = 3
738+
expected.loc[5,'B'] = 9
739739
assert_frame_equal(result, expected)
740740

741741
def test_interp_bad_method(self):
@@ -810,24 +810,22 @@ def test_interp_alt_scipy(self):
810810
'C': [1, 2, 3, 5, 8, 13, 21]})
811811
result = df.interpolate(method='barycentric')
812812
expected = df.copy()
813-
expected['A'].iloc[2] = 3
814-
expected['A'].iloc[5] = 6
813+
expected.ix[2,'A'] = 3
814+
expected.ix[5,'A'] = 6
815815
assert_frame_equal(result, expected)
816816

817817
result = df.interpolate(method='barycentric', downcast='infer')
818818
assert_frame_equal(result, expected.astype(np.int64))
819819

820820
result = df.interpolate(method='krogh')
821821
expectedk = df.copy()
822-
# expectedk['A'].iloc[2] = 3
823-
# expectedk['A'].iloc[5] = 6
824822
expectedk['A'] = expected['A']
825823
assert_frame_equal(result, expectedk)
826824

827825
_skip_if_no_pchip()
828826
result = df.interpolate(method='pchip')
829-
expected['A'].iloc[2] = 3
830-
expected['A'].iloc[5] = 6.125
827+
expected.ix[2,'A'] = 3
828+
expected.ix[5,'A'] = 6.125
831829
assert_frame_equal(result, expected)
832830

833831
def test_interp_rowwise(self):
@@ -838,9 +836,9 @@ def test_interp_rowwise(self):
838836
4: [1, 2, 3, 4]})
839837
result = df.interpolate(axis=1)
840838
expected = df.copy()
841-
expected[1].loc[3] = 5
842-
expected[2].loc[0] = 3
843-
expected[3].loc[1] = 3
839+
expected.loc[3,1] = 5
840+
expected.loc[0,2] = 3
841+
expected.loc[1,3] = 3
844842
expected[4] = expected[4].astype(np.float64)
845843
assert_frame_equal(result, expected)
846844

pandas/tests/test_indexing.py

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import pandas as pd
1212
import pandas.core.common as com
13+
from pandas import option_context
1314
from pandas.core.api import (DataFrame, Index, Series, Panel, isnull,
1415
MultiIndex, Float64Index, Timestamp)
1516
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
@@ -2320,10 +2321,12 @@ def test_ix_assign_column_mixed(self):
23202321
assert_frame_equal(df,expected)
23212322

23222323
# ok, but chained assignments are dangerous
2323-
df = pd.DataFrame({'a': lrange(4) })
2324-
df['b'] = np.nan
2325-
df['b'].ix[[1,3]] = [100,-100]
2326-
assert_frame_equal(df,expected)
2324+
# if we turn off chained assignement it will work
2325+
with option_context('chained_assignment',None):
2326+
df = pd.DataFrame({'a': lrange(4) })
2327+
df['b'] = np.nan
2328+
df['b'].ix[[1,3]] = [100,-100]
2329+
assert_frame_equal(df,expected)
23272330

23282331
def test_ix_get_set_consistency(self):
23292332

@@ -3036,22 +3039,26 @@ def test_cache_updating(self):
30363039
self.assertEqual(result, 2)
30373040

30383041
def test_slice_consolidate_invalidate_item_cache(self):
3039-
# #3970
3040-
df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5})
30413042

3042-
# Creates a second float block
3043-
df["cc"] = 0.0
3043+
# this is chained assignment, but will 'work'
3044+
with option_context('chained_assignment',None):
3045+
3046+
# #3970
3047+
df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5})
30443048

3045-
# caches a reference to the 'bb' series
3046-
df["bb"]
3049+
# Creates a second float block
3050+
df["cc"] = 0.0
30473051

3048-
# repr machinery triggers consolidation
3049-
repr(df)
3052+
# caches a reference to the 'bb' series
3053+
df["bb"]
30503054

3051-
# Assignment to wrong series
3052-
df['bb'].iloc[0] = 0.17
3053-
df._clear_item_cache()
3054-
self.assertAlmostEqual(df['bb'][0], 0.17)
3055+
# repr machinery triggers consolidation
3056+
repr(df)
3057+
3058+
# Assignment to wrong series
3059+
df['bb'].iloc[0] = 0.17
3060+
df._clear_item_cache()
3061+
self.assertAlmostEqual(df['bb'][0], 0.17)
30553062

30563063
def test_setitem_cache_updating(self):
30573064
# GH 5424
@@ -3135,17 +3142,19 @@ def test_detect_chained_assignment(self):
31353142
expected = DataFrame([[-5,1],[-6,3]],columns=list('AB'))
31363143
df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64')
31373144
self.assertIsNone(df.is_copy)
3138-
31393145
df['A'][0] = -5
31403146
df['A'][1] = -6
31413147
assert_frame_equal(df, expected)
31423148

3143-
expected = DataFrame([[-5,2],[np.nan,3.]],columns=list('AB'))
3149+
# test with the chaining
31443150
df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)})
31453151
self.assertIsNone(df.is_copy)
3146-
df['A'][0] = -5
3147-
df['A'][1] = np.nan
3148-
assert_frame_equal(df, expected)
3152+
def f():
3153+
df['A'][0] = -5
3154+
self.assertRaises(com.SettingWithCopyError, f)
3155+
def f():
3156+
df['A'][1] = np.nan
3157+
self.assertRaises(com.SettingWithCopyError, f)
31493158
self.assertIsNone(df['A'].is_copy)
31503159

31513160
# using a copy (the chain), fails
@@ -3172,18 +3181,14 @@ def f():
31723181

31733182
expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]})
31743183
df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
3175-
df['A'][0] = 111
3184+
def f():
3185+
df['A'][0] = 111
3186+
self.assertRaises(com.SettingWithCopyError, f)
31763187
def f():
31773188
df.loc[0]['A'] = 111
31783189
self.assertRaises(com.SettingWithCopyError, f)
31793190
assert_frame_equal(df,expected)
31803191

3181-
# warnings
3182-
pd.set_option('chained_assignment','warn')
3183-
df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
3184-
with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning):
3185-
df.loc[0]['A'] = 111
3186-
31873192
# make sure that is_copy is picked up reconstruction
31883193
# GH5475
31893194
df = DataFrame({"A": [1,2]})
@@ -3196,7 +3201,6 @@ def f():
31963201

31973202
# a suprious raise as we are setting the entire column here
31983203
# GH5597
3199-
pd.set_option('chained_assignment','raise')
32003204
from string import ascii_letters as letters
32013205

32023206
def random_text(nobs=100):
@@ -3295,6 +3299,28 @@ def f():
32953299
df.iloc[0:5]['group'] = 'a'
32963300
self.assertRaises(com.SettingWithCopyError, f)
32973301

3302+
# mixed type setting
3303+
# same dtype & changing dtype
3304+
df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde')))
3305+
3306+
def f():
3307+
df.ix[2]['D'] = 'foo'
3308+
self.assertRaises(com.SettingWithCopyError, f)
3309+
def f():
3310+
df.ix[2]['C'] = 'foo'
3311+
self.assertRaises(com.SettingWithCopyError, f)
3312+
def f():
3313+
df['C'][2] = 'foo'
3314+
self.assertRaises(com.SettingWithCopyError, f)
3315+
3316+
def test_detect_chained_assignment_warnings(self):
3317+
3318+
# warnings
3319+
with option_context('chained_assignment','warn'):
3320+
df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
3321+
with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning):
3322+
df.loc[0]['A'] = 111
3323+
32983324
def test_float64index_slicing_bug(self):
32993325
# GH 5557, related to slicing a float index
33003326
ser = {256: 2321.0, 1: 78.0, 2: 2716.0, 3: 0.0, 4: 369.0, 5: 0.0, 6: 269.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 3536.0, 11: 0.0, 12: 24.0, 13: 0.0, 14: 931.0, 15: 0.0, 16: 101.0, 17: 78.0, 18: 9643.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 63761.0, 23: 0.0, 24: 446.0, 25: 0.0, 26: 34773.0, 27: 0.0, 28: 729.0, 29: 78.0, 30: 0.0, 31: 0.0, 32: 3374.0, 33: 0.0, 34: 1391.0, 35: 0.0, 36: 361.0, 37: 0.0, 38: 61808.0, 39: 0.0, 40: 0.0, 41: 0.0, 42: 6677.0, 43: 0.0, 44: 802.0, 45: 0.0, 46: 2691.0, 47: 0.0, 48: 3582.0, 49: 0.0, 50: 734.0, 51: 0.0, 52: 627.0, 53: 70.0, 54: 2584.0, 55: 0.0, 56: 324.0, 57: 0.0, 58: 605.0, 59: 0.0, 60: 0.0, 61: 0.0, 62: 3989.0, 63: 10.0, 64: 42.0, 65: 0.0, 66: 904.0, 67: 0.0, 68: 88.0, 69: 70.0, 70: 8172.0, 71: 0.0, 72: 0.0, 73: 0.0, 74: 64902.0, 75: 0.0, 76: 347.0, 77: 0.0, 78: 36605.0, 79: 0.0, 80: 379.0, 81: 70.0, 82: 0.0, 83: 0.0, 84: 3001.0, 85: 0.0, 86: 1630.0, 87: 7.0, 88: 364.0, 89: 0.0, 90: 67404.0, 91: 9.0, 92: 0.0, 93: 0.0, 94: 7685.0, 95: 0.0, 96: 1017.0, 97: 0.0, 98: 2831.0, 99: 0.0, 100: 2963.0, 101: 0.0, 102: 854.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0, 110: 0.0, 111: 0.0, 112: 0.0, 113: 0.0, 114: 0.0, 115: 0.0, 116: 0.0, 117: 0.0, 118: 0.0, 119: 0.0, 120: 0.0, 121: 0.0, 122: 0.0, 123: 0.0, 124: 0.0, 125: 0.0, 126: 67744.0, 127: 22.0, 128: 264.0, 129: 0.0, 260: 197.0, 268: 0.0, 265: 0.0, 269: 0.0, 261: 0.0, 266: 1198.0, 267: 0.0, 262: 2629.0, 258: 775.0, 257: 0.0, 263: 0.0, 259: 0.0, 264: 163.0, 250: 10326.0, 251: 0.0, 252: 1228.0, 253: 0.0, 254: 2769.0, 255: 0.0}

0 commit comments

Comments
 (0)