From 9e0122a31807847dad8ce06bef42899b5f6454dc Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 8 Oct 2014 19:05:15 -0400 Subject: [PATCH] BUG: Bug in inplace operations with column sub-selections on the lhs (GH8511) --- doc/source/v0.15.0.txt | 41 ++++++++++++++- pandas/core/base.py | 2 +- pandas/core/generic.py | 37 ++++++++++--- pandas/core/index.py | 2 +- pandas/core/ops.py | 29 +++++++++-- pandas/core/series.py | 5 +- pandas/tests/test_frame.py | 104 ++++++++++++++++++++++++++++++++++++- 7 files changed, 201 insertions(+), 19 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index e76a0e57c5e33..eec424f619bde 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -272,6 +272,46 @@ API changes df df.dtypes +- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`,:issue:`5104`) + + .. ipython:: python + + s = Series([1, 2, 3]) + s2 = s + s += 1.5 + + Behavior prior to v0.15.0 + + .. code-block:: python + + + # the original object + In [5]: s + Out[5]: + 0 2.5 + 1 3.5 + 2 4.5 + dtype: float64 + + + # a reference to the original object + In [7]: s2 + Out[7]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + This is now the correct behavior + + .. ipython:: python + + # the original object + s + + # a reference to the original object + s2 + - ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`). - ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`). @@ -954,7 +994,6 @@ Bug Fixes - Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`) - - Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`) - Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`) - Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 36cf3d9c7407c..794c05db082c7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -538,6 +538,6 @@ def duplicated(self, take_last=False): #---------------------------------------------------------------------- # abstracts - def _update_inplace(self, result): + def _update_inplace(self, result, **kwargs): raise NotImplementedError diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7d3716a8a2fa6..53abfe10fe8ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1108,9 +1108,21 @@ def _is_view(self): """ boolean : return if I am a view of another array """ return self._data.is_view - def _maybe_update_cacher(self, clear=False): - """ see if we need to update our parent cacher - if clear, then clear our cache """ + def _maybe_update_cacher(self, clear=False, verify_is_copy=True): + """ + + see if we need to update our parent cacher + if clear, then clear our cache + + Parameters + ---------- + clear : boolean, default False + clear the item cache + verify_is_copy : boolean, default True + provide is_copy checks + + """ + cacher = getattr(self, '_cacher', None) if cacher is not None: ref = cacher[1]() @@ -1125,8 +1137,8 @@ def _maybe_update_cacher(self, clear=False): except: pass - # check if we are a copy - self._check_setitem_copy(stacklevel=5, t='referant') + if verify_is_copy: + self._check_setitem_copy(stacklevel=5, t='referant') if clear: self._clear_item_cache() @@ -1564,14 +1576,23 @@ def drop(self, labels, axis=0, level=None, inplace=False, **kwargs): else: return result - def _update_inplace(self, result): - "replace self internals with result." + def _update_inplace(self, result, verify_is_copy=True): + """ + replace self internals with result. + + Parameters + ---------- + verify_is_copy : boolean, default True + provide is_copy checks + + """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. + self._reset_cache() self._clear_item_cache() self._data = getattr(result,'_data',result) - self._maybe_update_cacher() + self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self, prefix): """ diff --git a/pandas/core/index.py b/pandas/core/index.py index e10f4b2009817..99f1682b133c3 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -220,7 +220,7 @@ def _simple_new(cls, values, name=None, **kwargs): result._reset_identity() return result - def _update_inplace(self, result): + def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace") diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2c7d92afe3b79..068cdff7fcf2d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -161,20 +161,39 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, if passed, will not set functions with names in exclude """ radd_func = radd_func or operator.add + # in frame, special methods have default_axis = None, comp methods use # 'columns' + new_methods = _create_methods(arith_method, radd_func, comp_method, bool_method, use_numexpr, default_axis=None, special=True) # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed + + def _wrap_inplace_method(method): + """ + return an inplace wrapper for this method + """ + + def f(self, other): + result = method(self, other) + + # this makes sure that we are aligned like the input + # we are updating inplace so we want to ignore is_copy + self._update_inplace(result.reindex_like(self,copy=False)._data, + verify_is_copy=False) + + return self + return f + new_methods.update(dict( - __iadd__=new_methods["__add__"], - __isub__=new_methods["__sub__"], - __imul__=new_methods["__mul__"], - __itruediv__=new_methods["__truediv__"], - __ipow__=new_methods["__pow__"] + __iadd__=_wrap_inplace_method(new_methods["__add__"]), + __isub__=_wrap_inplace_method(new_methods["__sub__"]), + __imul__=_wrap_inplace_method(new_methods["__mul__"]), + __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), + __ipow__=_wrap_inplace_method(new_methods["__pow__"]), )) if not compat.PY3: new_methods["__idiv__"] = new_methods["__div__"] diff --git a/pandas/core/series.py b/pandas/core/series.py index 95a7b22b90338..0408d62ce302c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -267,8 +267,9 @@ def _set_subtyp(self, is_all_dates): else: object.__setattr__(self, '_subtyp', 'series') - def _update_inplace(self, result): - return generic.NDFrame._update_inplace(self, result) + def _update_inplace(self, result, **kwargs): + # we want to call the generic version and not the IndexOpsMixin + return generic.NDFrame._update_inplace(self, result, **kwargs) # ndarray compatibility @property diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ba81d98510f5c..3efd399450d11 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -249,6 +249,109 @@ def test_setitem_mulit_index(self): df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j] assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')]) + def test_inplace_ops_alignment(self): + + # inplace ops / ops alignment + # GH 8511 + + columns = list('abcdefg') + X_orig = DataFrame(np.arange(10*len(columns)).reshape(-1,len(columns)), columns=columns, index=range(10)) + Z = 100*X_orig.iloc[:,1:-1].copy() + block1 = list('bedcf') + subs = list('bcdef') + + # add + X = X_orig.copy() + result1 = (X[block1] + Z).reindex(columns=subs) + + X[block1] += Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] + Z[block1]).reindex(columns=subs) + + X[block1] += Z[block1] + result4 = X.reindex(columns=subs) + + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) + assert_frame_equal(result1, result4) + + # sub + X = X_orig.copy() + result1 = (X[block1] - Z).reindex(columns=subs) + + X[block1] -= Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] - Z[block1]).reindex(columns=subs) + + X[block1] -= Z[block1] + result4 = X.reindex(columns=subs) + + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) + assert_frame_equal(result1, result4) + + def test_inplace_ops_identity(self): + + # GH 5104 + # make sure that we are actually changing the object + s_orig = Series([1, 2, 3]) + df_orig = DataFrame(np.random.randint(0,5,size=10).reshape(-1,5)) + + # no dtype change + s = s_orig.copy() + s2 = s + s += 1 + assert_series_equal(s,s2) + assert_series_equal(s_orig+1,s) + self.assertIs(s,s2) + self.assertIs(s._data,s2._data) + + df = df_orig.copy() + df2 = df + df += 1 + assert_frame_equal(df,df2) + assert_frame_equal(df_orig+1,df) + self.assertIs(df,df2) + self.assertIs(df._data,df2._data) + + # dtype change + s = s_orig.copy() + s2 = s + s += 1.5 + assert_series_equal(s,s2) + assert_series_equal(s_orig+1.5,s) + + df = df_orig.copy() + df2 = df + df += 1.5 + assert_frame_equal(df,df2) + assert_frame_equal(df_orig+1.5,df) + self.assertIs(df,df2) + self.assertIs(df._data,df2._data) + + # mixed dtype + arr = np.random.randint(0,10,size=5) + df_orig = DataFrame({'A' : arr.copy(), 'B' : 'foo'}) + df = df_orig.copy() + df2 = df + df['A'] += 1 + expected = DataFrame({'A' : arr.copy()+1, 'B' : 'foo'}) + assert_frame_equal(df,expected) + assert_frame_equal(df2,expected) + self.assertIs(df._data,df2._data) + + df = df_orig.copy() + df2 = df + df['A'] += 1.5 + expected = DataFrame({'A' : arr.copy()+1.5, 'B' : 'foo'}) + assert_frame_equal(df,expected) + assert_frame_equal(df2,expected) + self.assertIs(df._data,df2._data) + def test_getitem_boolean(self): # boolean indexing d = self.tsframe.index[10] @@ -4979,7 +5082,6 @@ def test_div(self): self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0))) def test_logical_operators(self): - import operator def _check_bin_op(op): result = op(df1, df2)