Skip to content

BUG: Bug in inplace operations with column sub-selections on the lhs (GH8511) #8520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 9, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,46 @@ API changes
df
df.dtypes

- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`,:issue:`5104`)

.. ipython:: python

s = Series([1, 2, 3])
s2 = s
s += 1.5

Behavior prior to v0.15.0

.. code-block:: python


# the original object
In [5]: s
Out[5]:
0 2.5
1 3.5
2 4.5
dtype: float64


# a reference to the original object
In [7]: s2
Out[7]:
0 1
1 2
2 3
dtype: int64

This is now the correct behavior

.. ipython:: python

# the original object
s

# a reference to the original object
s2

- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`).

- ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`).
Expand Down Expand Up @@ -954,7 +994,6 @@ Bug Fixes
- Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`)



- Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`)
- Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`)
- Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,6 @@ def duplicated(self, take_last=False):
#----------------------------------------------------------------------
# abstracts

def _update_inplace(self, result):
def _update_inplace(self, result, **kwargs):
raise NotImplementedError

37 changes: 29 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,9 +1108,21 @@ def _is_view(self):
""" boolean : return if I am a view of another array """
return self._data.is_view

def _maybe_update_cacher(self, clear=False):
""" see if we need to update our parent cacher
if clear, then clear our cache """
def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
"""

see if we need to update our parent cacher
if clear, then clear our cache

Parameters
----------
clear : boolean, default False
clear the item cache
verify_is_copy : boolean, default True
provide is_copy checks

"""

cacher = getattr(self, '_cacher', None)
if cacher is not None:
ref = cacher[1]()
Expand All @@ -1125,8 +1137,8 @@ def _maybe_update_cacher(self, clear=False):
except:
pass

# check if we are a copy
self._check_setitem_copy(stacklevel=5, t='referant')
if verify_is_copy:
self._check_setitem_copy(stacklevel=5, t='referant')

if clear:
self._clear_item_cache()
Expand Down Expand Up @@ -1564,14 +1576,23 @@ def drop(self, labels, axis=0, level=None, inplace=False, **kwargs):
else:
return result

def _update_inplace(self, result):
"replace self internals with result."
def _update_inplace(self, result, verify_is_copy=True):
"""
replace self internals with result.

Parameters
----------
verify_is_copy : boolean, default True
provide is_copy checks

"""
# NOTE: This does *not* call __finalize__ and that's an explicit
# decision that we may revisit in the future.

self._reset_cache()
self._clear_item_cache()
self._data = getattr(result,'_data',result)
self._maybe_update_cacher()
self._maybe_update_cacher(verify_is_copy=verify_is_copy)

def add_prefix(self, prefix):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _simple_new(cls, values, name=None, **kwargs):
result._reset_identity()
return result

def _update_inplace(self, result):
def _update_inplace(self, result, **kwargs):
# guard when called from IndexOpsMixin
raise TypeError("Index can't be updated inplace")

Expand Down
29 changes: 24 additions & 5 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,39 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None,
if passed, will not set functions with names in exclude
"""
radd_func = radd_func or operator.add

# in frame, special methods have default_axis = None, comp methods use
# 'columns'

new_methods = _create_methods(arith_method, radd_func, comp_method,
bool_method, use_numexpr, default_axis=None,
special=True)

# inplace operators (I feel like these should get passed an `inplace=True`
# or just be removed

def _wrap_inplace_method(method):
"""
return an inplace wrapper for this method
"""

def f(self, other):
result = method(self, other)

# this makes sure that we are aligned like the input
# we are updating inplace so we want to ignore is_copy
self._update_inplace(result.reindex_like(self,copy=False)._data,
verify_is_copy=False)

return self
return f

new_methods.update(dict(
__iadd__=new_methods["__add__"],
__isub__=new_methods["__sub__"],
__imul__=new_methods["__mul__"],
__itruediv__=new_methods["__truediv__"],
__ipow__=new_methods["__pow__"]
__iadd__=_wrap_inplace_method(new_methods["__add__"]),
__isub__=_wrap_inplace_method(new_methods["__sub__"]),
__imul__=_wrap_inplace_method(new_methods["__mul__"]),
__itruediv__=_wrap_inplace_method(new_methods["__truediv__"]),
__ipow__=_wrap_inplace_method(new_methods["__pow__"]),
))
if not compat.PY3:
new_methods["__idiv__"] = new_methods["__div__"]
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ def _set_subtyp(self, is_all_dates):
else:
object.__setattr__(self, '_subtyp', 'series')

def _update_inplace(self, result):
return generic.NDFrame._update_inplace(self, result)
def _update_inplace(self, result, **kwargs):
# we want to call the generic version and not the IndexOpsMixin
return generic.NDFrame._update_inplace(self, result, **kwargs)

# ndarray compatibility
@property
Expand Down
104 changes: 103 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,109 @@ def test_setitem_mulit_index(self):
df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])

def test_inplace_ops_alignment(self):

# inplace ops / ops alignment
# GH 8511

columns = list('abcdefg')
X_orig = DataFrame(np.arange(10*len(columns)).reshape(-1,len(columns)), columns=columns, index=range(10))
Z = 100*X_orig.iloc[:,1:-1].copy()
block1 = list('bedcf')
subs = list('bcdef')

# add
X = X_orig.copy()
result1 = (X[block1] + Z).reindex(columns=subs)

X[block1] += Z
result2 = X.reindex(columns=subs)

X = X_orig.copy()
result3 = (X[block1] + Z[block1]).reindex(columns=subs)

X[block1] += Z[block1]
result4 = X.reindex(columns=subs)

assert_frame_equal(result1, result2)
assert_frame_equal(result1, result3)
assert_frame_equal(result1, result4)

# sub
X = X_orig.copy()
result1 = (X[block1] - Z).reindex(columns=subs)

X[block1] -= Z
result2 = X.reindex(columns=subs)

X = X_orig.copy()
result3 = (X[block1] - Z[block1]).reindex(columns=subs)

X[block1] -= Z[block1]
result4 = X.reindex(columns=subs)

assert_frame_equal(result1, result2)
assert_frame_equal(result1, result3)
assert_frame_equal(result1, result4)

def test_inplace_ops_identity(self):

# GH 5104
# make sure that we are actually changing the object
s_orig = Series([1, 2, 3])
df_orig = DataFrame(np.random.randint(0,5,size=10).reshape(-1,5))

# no dtype change
s = s_orig.copy()
s2 = s
s += 1
assert_series_equal(s,s2)
assert_series_equal(s_orig+1,s)
self.assertIs(s,s2)
self.assertIs(s._data,s2._data)

df = df_orig.copy()
df2 = df
df += 1
assert_frame_equal(df,df2)
assert_frame_equal(df_orig+1,df)
self.assertIs(df,df2)
self.assertIs(df._data,df2._data)

# dtype change
s = s_orig.copy()
s2 = s
s += 1.5
assert_series_equal(s,s2)
assert_series_equal(s_orig+1.5,s)

df = df_orig.copy()
df2 = df
df += 1.5
assert_frame_equal(df,df2)
assert_frame_equal(df_orig+1.5,df)
self.assertIs(df,df2)
self.assertIs(df._data,df2._data)

# mixed dtype
arr = np.random.randint(0,10,size=5)
df_orig = DataFrame({'A' : arr.copy(), 'B' : 'foo'})
df = df_orig.copy()
df2 = df
df['A'] += 1
expected = DataFrame({'A' : arr.copy()+1, 'B' : 'foo'})
assert_frame_equal(df,expected)
assert_frame_equal(df2,expected)
self.assertIs(df._data,df2._data)

df = df_orig.copy()
df2 = df
df['A'] += 1.5
expected = DataFrame({'A' : arr.copy()+1.5, 'B' : 'foo'})
assert_frame_equal(df,expected)
assert_frame_equal(df2,expected)
self.assertIs(df._data,df2._data)

def test_getitem_boolean(self):
# boolean indexing
d = self.tsframe.index[10]
Expand Down Expand Up @@ -4979,7 +5082,6 @@ def test_div(self):
self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0)))

def test_logical_operators(self):
import operator

def _check_bin_op(op):
result = op(df1, df2)
Expand Down