diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 8ec61496c538a..25233d970b3a6 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1481,7 +1481,8 @@ which can take the values ``['raise','warn',None]``, where showing a warning is 'three', 'two', 'one', 'six'], 'c' : np.arange(7)}) - # passed via reference (will stay) + # This will show the SettingWithCopyWarning + # but the frame values will be set dfb['c'][dfb.a.str.startswith('o')] = 42 This however is operating on a copy and will not work. diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 148cf85d0b5ab..6a9daa162cbf9 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -114,7 +114,7 @@ API changes df df.dtypes -- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`) +- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`) .. code-block:: python diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7b8b609fe0f2a..83110d143e8bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1088,8 +1088,14 @@ def _maybe_cache_changed(self, item, value): @property def _is_cached(self): """ boolean : return if I am cached """ + return getattr(self, '_cacher', None) is not None + + def _get_cacher(self): + """ return my cacher or None """ cacher = getattr(self, '_cacher', None) - return cacher is not None + if cacher is not None: + cacher = cacher[1]() + return cacher @property def _is_view(self): @@ -1154,8 +1160,35 @@ def _set_is_copy(self, ref=None, copy=True): else: self.is_copy = None - def _check_setitem_copy(self, stacklevel=4, t='setting'): + def _check_is_chained_assignment_possible(self): + """ + check if we are a view, have a cacher, and are of mixed type + if so, then force a setitem_copy check + + should be called just near setting a value + + will return a boolean if it we are a view and are cached, but a single-dtype + meaning that the cacher should be updated following setting """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(stacklevel=4, t='referant', force=True) + return True + elif self.is_copy: + self._check_setitem_copy(stacklevel=4, t='referant') + return False + + def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): + """ + + Parameters + ---------- + stacklevel : integer, default 4 + the level to show of the stack when the error is output + t : string, the type of setting error + force : boolean, default False + if True, then force showing an error validate if we are doing a settitem on a chained copy. @@ -1177,7 +1210,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting'): """ - if self.is_copy: + if force or self.is_copy: value = config.get_option('mode.chained_assignment') if value is None: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 91008f9b22aed..6ee03eab4bab8 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -472,6 +472,9 @@ def can_do_equal_len(): if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + # actually do the set self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3901e19968841..5a490992c478c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -587,61 +587,68 @@ def _get_values(self, indexer): return self.values[indexer] def __setitem__(self, key, value): - try: - self._set_with_engine(key, value) - return - except (SettingWithCopyError): - raise - except (KeyError, ValueError): - values = self.values - if (com.is_integer(key) - and not self.index.inferred_type == 'integer'): - values[key] = value + def setitem(key, value): + try: + self._set_with_engine(key, value) return - elif key is Ellipsis: - self[:] = value + except (SettingWithCopyError): + raise + except (KeyError, ValueError): + values = self.values + if (com.is_integer(key) + and not self.index.inferred_type == 'integer'): + + values[key] = value + return + elif key is Ellipsis: + self[:] = value + return + elif _is_bool_indexer(key): + pass + elif com.is_timedelta64_dtype(self.dtype): + # reassign a null value to iNaT + if isnull(value): + value = tslib.iNaT + + try: + self.index._engine.set_value(self.values, key, value) + return + except (TypeError): + pass + + self.loc[key] = value return - elif _is_bool_indexer(key): - pass - elif com.is_timedelta64_dtype(self.dtype): - # reassign a null value to iNaT - if isnull(value): - value = tslib.iNaT - - try: - self.index._engine.set_value(self.values, key, value) - return - except (TypeError): - pass - - self.loc[key] = value - return - except TypeError as e: - if isinstance(key, tuple) and not isinstance(self.index, - MultiIndex): - raise ValueError("Can only tuple-index with a MultiIndex") + except TypeError as e: + if isinstance(key, tuple) and not isinstance(self.index, + MultiIndex): + raise ValueError("Can only tuple-index with a MultiIndex") - # python 3 type errors should be raised - if 'unorderable' in str(e): # pragma: no cover - raise IndexError(key) + # python 3 type errors should be raised + if 'unorderable' in str(e): # pragma: no cover + raise IndexError(key) - if _is_bool_indexer(key): - key = _check_bool_indexer(self.index, key) - try: - self.where(~key, value, inplace=True) - return - except (InvalidIndexError): - pass + if _is_bool_indexer(key): + key = _check_bool_indexer(self.index, key) + try: + self.where(~key, value, inplace=True) + return + except (InvalidIndexError): + pass + + self._set_with(key, value) - self._set_with(key, value) + # do the setitem + cacher_needs_updating = self._check_is_chained_assignment_possible() + setitem(key, value) + if cacher_needs_updating: + self._maybe_update_cacher() def _set_with_engine(self, key, value): values = self.values try: self.index._engine.set_value(values, key, value) - self._check_setitem_copy() return except KeyError: values[self.index.get_loc(key)] = value diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index f6f705201bf18..62d729ccdaa88 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -305,13 +305,13 @@ def test_frame_from_json_nones(self): # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) - df[2][0] = np.inf + df.loc[0,2] = np.inf unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) - df[2][0] = np.NINF + df.loc[0,2] = np.NINF unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(),dtype=False) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 4f76f72b8eb66..89809b47d76eb 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1278,8 +1278,8 @@ def test_append_with_data_columns(self): # data column selection with a string data_column df_new = df.copy() df_new['string'] = 'foo' - df_new['string'][1:4] = np.nan - df_new['string'][5:6] = 'bar' + df_new.loc[1:4,'string'] = np.nan + df_new.loc[5:6,'string'] = 'bar' _maybe_remove(store, 'df') store.append('df', df_new, data_columns=['string']) result = store.select('df', [Term('string=foo')]) @@ -1317,14 +1317,14 @@ def check_col(key,name,size): with ensure_clean_store(self.path) as store: # multiple data columns df_new = df.copy() - df_new.loc[:,'A'].iloc[0] = 1. - df_new.loc[:,'B'].iloc[0] = -1. + df_new.ix[0,'A'] = 1. + df_new.ix[0,'B'] = -1. df_new['string'] = 'foo' - df_new['string'][1:4] = np.nan - df_new['string'][5:6] = 'bar' + df_new.loc[1:4,'string'] = np.nan + df_new.loc[5:6,'string'] = 'bar' df_new['string2'] = 'foo' - df_new['string2'][2:5] = np.nan - df_new['string2'][7:8] = 'bar' + df_new.loc[2:5,'string2'] = np.nan + df_new.loc[7:8,'string2'] = 'bar' _maybe_remove(store, 'df') store.append( 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 5d785df355aa3..27f5ab3c63d81 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1348,8 +1348,8 @@ def test_to_string(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie['A'][:20] = nan - biggie['B'][:20] = nan + biggie.loc[:20,'A'] = nan + biggie.loc[:20,'B'] = nan s = biggie.to_string() buf = StringIO() @@ -1597,8 +1597,8 @@ def test_to_html(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie['A'][:20] = nan - biggie['B'][:20] = nan + biggie.loc[:20,'A'] = nan + biggie.loc[:20,'B'] = nan s = biggie.to_html() buf = StringIO() @@ -1624,8 +1624,8 @@ def test_to_html_filename(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie['A'][:20] = nan - biggie['B'][:20] = nan + biggie.loc[:20,'A'] = nan + biggie.loc[:20,'B'] = nan with tm.ensure_clean('test.html') as path: biggie.to_html(path) with open(path, 'r') as f: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 6a31f573951cd..7912debd0d409 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -32,7 +32,8 @@ import pandas.core.format as fmt import pandas.core.datetools as datetools from pandas import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv) + MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv, + option_context) import pandas as pd from pandas.parser import CParserError from pandas.util.misc import is_little_endian @@ -4437,8 +4438,8 @@ def test_repr_mixed_big(self): biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie['A'][:20] = nan - biggie['B'][:20] = nan + biggie.loc[:20,'A'] = nan + biggie.loc[:20,'B'] = nan foo = repr(biggie) @@ -4469,13 +4470,13 @@ def test_repr(self): def test_repr_dimensions(self): df = DataFrame([[1, 2,], [3, 4]]) - with pd.option_context('display.show_dimensions', True): + with option_context('display.show_dimensions', True): self.assertTrue("2 rows x 2 columns" in repr(df)) - with pd.option_context('display.show_dimensions', False): + with option_context('display.show_dimensions', False): self.assertFalse("2 rows x 2 columns" in repr(df)) - with pd.option_context('display.show_dimensions', 'truncate'): + with option_context('display.show_dimensions', 'truncate'): self.assertFalse("2 rows x 2 columns" in repr(df)) @slow @@ -6475,7 +6476,7 @@ def test_info_max_cols(self): df = DataFrame(np.random.randn(10, 5)) for len_, verbose in [(4, None), (4, False), (9, True)]: # For verbose always ^ setting ^ summarize ^ full output - with pd.option_context('max_info_columns', 4): + with option_context('max_info_columns', 4): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() @@ -6484,7 +6485,7 @@ def test_info_max_cols(self): for len_, verbose in [(9, None), (4, False), (9, True)]: # max_cols no exceeded - with pd.option_context('max_info_columns', 5): + with option_context('max_info_columns', 5): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() @@ -6492,14 +6493,14 @@ def test_info_max_cols(self): for len_, max_cols in [(9, 5), (4, 4)]: # setting truncates - with pd.option_context('max_info_columns', 4): + with option_context('max_info_columns', 4): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() self.assertEqual(len(res.split('\n')), len_) # setting wouldn't truncate - with pd.option_context('max_info_columns', 5): + with option_context('max_info_columns', 5): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() @@ -7411,19 +7412,19 @@ def test_drop(self): assert_frame_equal(df,expected) def test_fillna(self): - self.tsframe['A'][:5] = nan - self.tsframe['A'][-5:] = nan + self.tsframe.ix[:5,'A'] = nan + self.tsframe.ix[-5:,'A'] = nan zero_filled = self.tsframe.fillna(0) - self.assertTrue((zero_filled['A'][:5] == 0).all()) + self.assertTrue((zero_filled.ix[:5,'A'] == 0).all()) padded = self.tsframe.fillna(method='pad') - self.assertTrue(np.isnan(padded['A'][:5]).all()) - self.assertTrue((padded['A'][-5:] == padded['A'][-5]).all()) + self.assertTrue(np.isnan(padded.ix[:5,'A']).all()) + self.assertTrue((padded.ix[-5:,'A'] == padded.ix[-5,'A']).all()) # mixed type - self.mixed_frame['foo'][5:20] = nan - self.mixed_frame['A'][-10:] = nan + self.mixed_frame.ix[5:20,'foo'] = nan + self.mixed_frame.ix[-10:,'A'] = nan result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') @@ -7432,7 +7433,7 @@ def test_fillna(self): # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A','B','D']) - mf['A'][-10:] = nan + mf.ix[-10:,'A'] = nan result = mf.fillna(value=0) _check_mixed_float(result, dtype = dict(C = None)) @@ -7604,8 +7605,8 @@ def test_replace_inplace(self): self.assertRaises(TypeError, self.tsframe.replace, nan) # mixed type - self.mixed_frame['foo'][5:20] = nan - self.mixed_frame['A'][-10:] = nan + self.mixed_frame.ix[5:20,'foo'] = nan + self.mixed_frame.ix[-10:,'A'] = nan result = self.mixed_frame.replace(np.nan, 0) expected = self.mixed_frame.fillna(value=0) @@ -8193,8 +8194,8 @@ def test_replace_convert(self): assert_series_equal(expec, res) def test_replace_mixed(self): - self.mixed_frame['foo'][5:20] = nan - self.mixed_frame['A'][-10:] = nan + self.mixed_frame.ix[5:20,'foo'] = nan + self.mixed_frame.ix[-10:,'A'] = nan result = self.mixed_frame.replace(np.nan, -18) expected = self.mixed_frame.fillna(value=-18) @@ -9872,7 +9873,7 @@ def test_apply_modify_traceback(self): 'E': np.random.randn(11), 'F': np.random.randn(11)}) - data['C'][4] = np.nan + data.loc[4,'C'] = np.nan def transform(row): if row['C'].startswith('shin') and row['A'] == 'foo': @@ -11716,11 +11717,11 @@ def test_rename_objects(self): self.assertNotIn('foo', renamed) def test_fill_corner(self): - self.mixed_frame['foo'][5:20] = nan - self.mixed_frame['A'][-10:] = nan + self.mixed_frame.ix[5:20,'foo'] = nan + self.mixed_frame.ix[-10:,'A'] = nan filled = self.mixed_frame.fillna(value=0) - self.assertTrue((filled['foo'][5:20] == 0).all()) + self.assertTrue((filled.ix[5:20,'foo'] == 0).all()) del self.mixed_frame['foo'] empty_float = self.frame.reindex(columns=[]) @@ -12551,15 +12552,18 @@ def test_idxmax(self): self.assertRaises(ValueError, frame.idxmax, axis=2) def test_stale_cached_series_bug_473(self): - Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), - columns=('e', 'f', 'g', 'h')) - repr(Y) - Y['e'] = Y['e'].astype('object') - Y['g']['c'] = np.NaN - repr(Y) - result = Y.sum() - exp = Y['g'].sum() - self.assertTrue(isnull(Y['g']['c'])) + + # this is chained, but ok + with option_context('chained_assignment',None): + Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), + columns=('e', 'f', 'g', 'h')) + repr(Y) + Y['e'] = Y['e'].astype('object') + Y['g']['c'] = np.NaN + repr(Y) + result = Y.sum() + exp = Y['g'].sum() + self.assertTrue(isnull(Y['g']['c'])) def test_index_namedtuple(self): from collections import namedtuple @@ -12712,6 +12716,7 @@ def __nonzero__(self): self.assertTrue(r1.all()) def test_strange_column_corruption_issue(self): + df = DataFrame(index=[0, 1]) df[0] = nan wasCol = {} diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index c607ccc3572b2..8d80962eb9902 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -734,8 +734,8 @@ def test_interp_basic(self): result = df.set_index('C').interpolate() expected = df.set_index('C') - expected.A.loc[3] = 3 - expected.B.loc[5] = 9 + expected.loc[3,'A'] = 3 + expected.loc[5,'B'] = 9 assert_frame_equal(result, expected) def test_interp_bad_method(self): @@ -810,8 +810,8 @@ def test_interp_alt_scipy(self): 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') expected = df.copy() - expected['A'].iloc[2] = 3 - expected['A'].iloc[5] = 6 + expected.ix[2,'A'] = 3 + expected.ix[5,'A'] = 6 assert_frame_equal(result, expected) result = df.interpolate(method='barycentric', downcast='infer') @@ -819,15 +819,13 @@ def test_interp_alt_scipy(self): result = df.interpolate(method='krogh') expectedk = df.copy() - # expectedk['A'].iloc[2] = 3 - # expectedk['A'].iloc[5] = 6 expectedk['A'] = expected['A'] assert_frame_equal(result, expectedk) _skip_if_no_pchip() result = df.interpolate(method='pchip') - expected['A'].iloc[2] = 3 - expected['A'].iloc[5] = 6.125 + expected.ix[2,'A'] = 3 + expected.ix[5,'A'] = 6.125 assert_frame_equal(result, expected) def test_interp_rowwise(self): @@ -838,9 +836,9 @@ def test_interp_rowwise(self): 4: [1, 2, 3, 4]}) result = df.interpolate(axis=1) expected = df.copy() - expected[1].loc[3] = 5 - expected[2].loc[0] = 3 - expected[3].loc[1] = 3 + expected.loc[3,1] = 5 + expected.loc[0,2] = 3 + expected.loc[1,3] = 3 expected[4] = expected[4].astype(np.float64) assert_frame_equal(result, expected) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 3def1b74af0c7..6f39750de9d9b 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1664,7 +1664,7 @@ def test_cythonized_aggers(self): 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df['C'][2:10:2] = nan + df.loc[2:10:2,'C'] = nan def _testit(op): # single column diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b8f51d0ca9950..3552c75900745 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -10,6 +10,7 @@ import pandas as pd import pandas.core.common as com +from pandas import option_context from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, MultiIndex, Float64Index, Timestamp) from pandas.util.testing import (assert_almost_equal, assert_series_equal, @@ -2320,10 +2321,12 @@ def test_ix_assign_column_mixed(self): assert_frame_equal(df,expected) # ok, but chained assignments are dangerous - df = pd.DataFrame({'a': lrange(4) }) - df['b'] = np.nan - df['b'].ix[[1,3]] = [100,-100] - assert_frame_equal(df,expected) + # if we turn off chained assignement it will work + with option_context('chained_assignment',None): + df = pd.DataFrame({'a': lrange(4) }) + df['b'] = np.nan + df['b'].ix[[1,3]] = [100,-100] + assert_frame_equal(df,expected) def test_ix_get_set_consistency(self): @@ -3036,22 +3039,26 @@ def test_cache_updating(self): self.assertEqual(result, 2) def test_slice_consolidate_invalidate_item_cache(self): - # #3970 - df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) - # Creates a second float block - df["cc"] = 0.0 + # this is chained assignment, but will 'work' + with option_context('chained_assignment',None): + + # #3970 + df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) + + # Creates a second float block + df["cc"] = 0.0 - # caches a reference to the 'bb' series - df["bb"] + # caches a reference to the 'bb' series + df["bb"] - # repr machinery triggers consolidation - repr(df) + # repr machinery triggers consolidation + repr(df) - # Assignment to wrong series - df['bb'].iloc[0] = 0.17 - df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) + # Assignment to wrong series + df['bb'].iloc[0] = 0.17 + df._clear_item_cache() + self.assertAlmostEqual(df['bb'][0], 0.17) def test_setitem_cache_updating(self): # GH 5424 @@ -3072,6 +3079,7 @@ def test_setitem_cache_updating(self): # GH 7084 # not updating cache on series setting with slices + expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014')) out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) @@ -3079,9 +3087,18 @@ def test_setitem_cache_updating(self): six = Timestamp('5/7/2014') eix = Timestamp('5/9/2014') for ix, row in df.iterrows(): - out[row['C']][six:eix] = out[row['C']][six:eix] + row['D'] + out.loc[six:eix,row['C']] = out.loc[six:eix,row['C']] + row['D'] + + assert_frame_equal(out, expected) + assert_series_equal(out['A'], expected['A']) + + # try via a chain indexing + # this actually works + out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + v = out[row['C']][six:eix] + row['D'] + out[row['C']][six:eix] = v - expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014')) assert_frame_equal(out, expected) assert_series_equal(out['A'], expected['A']) @@ -3135,17 +3152,19 @@ def test_detect_chained_assignment(self): expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') self.assertIsNone(df.is_copy) - df['A'][0] = -5 df['A'][1] = -6 assert_frame_equal(df, expected) - expected = DataFrame([[-5,2],[np.nan,3.]],columns=list('AB')) + # test with the chaining df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) self.assertIsNone(df.is_copy) - df['A'][0] = -5 - df['A'][1] = np.nan - assert_frame_equal(df, expected) + def f(): + df['A'][0] = -5 + self.assertRaises(com.SettingWithCopyError, f) + def f(): + df['A'][1] = np.nan + self.assertRaises(com.SettingWithCopyError, f) self.assertIsNone(df['A'].is_copy) # using a copy (the chain), fails @@ -3167,22 +3186,18 @@ def f(): indexer = df.a.str.startswith('o') df[indexer]['c'] = 42 self.assertRaises(com.SettingWithCopyError, f) - df['c'][df.a.str.startswith('o')] = 42 - assert_frame_equal(df,expected) expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]}) df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - df['A'][0] = 111 + def f(): + df['A'][0] = 111 + self.assertRaises(com.SettingWithCopyError, f) def f(): df.loc[0]['A'] = 111 self.assertRaises(com.SettingWithCopyError, f) - assert_frame_equal(df,expected) - # warnings - pd.set_option('chained_assignment','warn') - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 + df.loc[0,'A'] = 111 + assert_frame_equal(df,expected) # make sure that is_copy is picked up reconstruction # GH5475 @@ -3196,7 +3211,6 @@ def f(): # a suprious raise as we are setting the entire column here # GH5597 - pd.set_option('chained_assignment','raise') from string import ascii_letters as letters def random_text(nobs=100): @@ -3295,6 +3309,28 @@ def f(): df.iloc[0:5]['group'] = 'a' self.assertRaises(com.SettingWithCopyError, f) + # mixed type setting + # same dtype & changing dtype + df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + + def f(): + df.ix[2]['D'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): + df.ix[2]['C'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): + df['C'][2] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + + def test_detect_chained_assignment_warnings(self): + + # warnings + with option_context('chained_assignment','warn'): + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning): + df.loc[0]['A'] = 111 + def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index ser = {256: 2321.0, 1: 78.0, 2: 2716.0, 3: 0.0, 4: 369.0, 5: 0.0, 6: 269.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 3536.0, 11: 0.0, 12: 24.0, 13: 0.0, 14: 931.0, 15: 0.0, 16: 101.0, 17: 78.0, 18: 9643.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 63761.0, 23: 0.0, 24: 446.0, 25: 0.0, 26: 34773.0, 27: 0.0, 28: 729.0, 29: 78.0, 30: 0.0, 31: 0.0, 32: 3374.0, 33: 0.0, 34: 1391.0, 35: 0.0, 36: 361.0, 37: 0.0, 38: 61808.0, 39: 0.0, 40: 0.0, 41: 0.0, 42: 6677.0, 43: 0.0, 44: 802.0, 45: 0.0, 46: 2691.0, 47: 0.0, 48: 3582.0, 49: 0.0, 50: 734.0, 51: 0.0, 52: 627.0, 53: 70.0, 54: 2584.0, 55: 0.0, 56: 324.0, 57: 0.0, 58: 605.0, 59: 0.0, 60: 0.0, 61: 0.0, 62: 3989.0, 63: 10.0, 64: 42.0, 65: 0.0, 66: 904.0, 67: 0.0, 68: 88.0, 69: 70.0, 70: 8172.0, 71: 0.0, 72: 0.0, 73: 0.0, 74: 64902.0, 75: 0.0, 76: 347.0, 77: 0.0, 78: 36605.0, 79: 0.0, 80: 379.0, 81: 70.0, 82: 0.0, 83: 0.0, 84: 3001.0, 85: 0.0, 86: 1630.0, 87: 7.0, 88: 364.0, 89: 0.0, 90: 67404.0, 91: 9.0, 92: 0.0, 93: 0.0, 94: 7685.0, 95: 0.0, 96: 1017.0, 97: 0.0, 98: 2831.0, 99: 0.0, 100: 2963.0, 101: 0.0, 102: 854.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0, 110: 0.0, 111: 0.0, 112: 0.0, 113: 0.0, 114: 0.0, 115: 0.0, 116: 0.0, 117: 0.0, 118: 0.0, 119: 0.0, 120: 0.0, 121: 0.0, 122: 0.0, 123: 0.0, 124: 0.0, 125: 0.0, 126: 67744.0, 127: 22.0, 128: 264.0, 129: 0.0, 260: 197.0, 268: 0.0, 265: 0.0, 269: 0.0, 261: 0.0, 266: 1198.0, 267: 0.0, 262: 2629.0, 258: 775.0, 257: 0.0, 263: 0.0, 259: 0.0, 264: 163.0, 250: 10326.0, 251: 0.0, 252: 1228.0, 253: 0.0, 254: 2769.0, 255: 0.0} diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index aa718a11d97cf..4ecb9a1430eba 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3668,6 +3668,8 @@ def test_underlying_data_conversion(self): tm.assert_frame_equal(df,expected) # GH 3970 + # these are chained assignments as well + pd.set_option('chained_assignment',None) df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) df["cc"] = 0.0 ck = [True]*len(df) @@ -3675,6 +3677,7 @@ def test_underlying_data_conversion(self): df_tmp = df.iloc[ck] df["bb"].iloc[0] = .15 self.assertEqual(df['bb'].iloc[0], 0.15) + pd.set_option('chained_assignment','raise') # GH 3217 df = DataFrame(dict(a = [1,3], b = [np.nan, 2])) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index df2f270346e20..919f30ef2a72f 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -930,8 +930,8 @@ def test_left_join_index_preserve_order(self): expected = left.copy() expected['v2'] = np.nan - expected['v2'][(expected.k1 == 2) & (expected.k2 == 'bar')] = 5 - expected['v2'][(expected.k1 == 1) & (expected.k2 == 'foo')] = 7 + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'),'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'),'v2'] = 7 tm.assert_frame_equal(result, expected) @@ -948,8 +948,8 @@ def test_left_join_index_preserve_order(self): expected = left.copy() expected['v2'] = np.nan - expected['v2'][(expected.k1 == 2) & (expected.k2 == 'bar')] = 5 - expected['v2'][(expected.k1 == 1) & (expected.k2 == 'foo')] = 7 + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'),'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'),'v2'] = 7 tm.assert_frame_equal(result, expected) @@ -976,8 +976,8 @@ def _test(dtype1,dtype2): if dtype2.kind == 'i': dtype2 = np.dtype('float64') expected['v2'] = np.array(np.nan,dtype=dtype2) - expected['v2'][(expected.k1 == 2) & (expected.k2 == 'bar')] = 5 - expected['v2'][(expected.k1 == 1) & (expected.k2 == 'foo')] = 7 + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'),'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'),'v2'] = 7 tm.assert_frame_equal(result, expected) @@ -1683,7 +1683,7 @@ def test_handle_empty_objects(self): expected = df.ix[:, ['a', 'b', 'c', 'd', 'foo']] expected['foo'] = expected['foo'].astype('O') - expected['foo'][:5] = 'bar' + expected.loc[0:4,'foo'] = 'bar' tm.assert_frame_equal(concatted, expected)