From 7acbbd47f9e9d11f86ab3244988ac92f449c878d Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 21 Jan 2014 17:35:18 -0500 Subject: [PATCH] BUG: Possible segfault when chained indexing with an object array under numpy 1.7.1 (GH6026) --- doc/source/release.rst | 1 + pandas/__init__.py | 1 + pandas/core/generic.py | 11 ++++++++--- pandas/core/internals.py | 32 ++++++++++++++++++++++++++------ pandas/tests/test_indexing.py | 23 +++++++++++++++++++++++ 5 files changed, 59 insertions(+), 9 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 69dc688cd6097..77ce69c40bb9b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -134,6 +134,7 @@ Bug Fixes - Bug in Series.xs with a multi-index (:issue:`6018`) - Bug in Series construction of mixed type with datelike and an integer (which should result in object type and not automatic conversion) (:issue:`6028`) + - Possible segfault when chained indexing with an object array under numpy 1.7.1 (:issue:`6016`) pandas 0.13.0 ------------- diff --git a/pandas/__init__.py b/pandas/__init__.py index 0fdcf655ca47a..ff5588e778284 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -29,6 +29,7 @@ _np_version = np.version.short_version _np_version_under1p6 = LooseVersion(_np_version) < '1.6' _np_version_under1p7 = LooseVersion(_np_version) < '1.7' +_np_version_under1p8 = LooseVersion(_np_version) < '1.8' from pandas.version import version as __version__ from pandas.info import __doc__ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 000d6a4f6da9b..eb24301e5e603 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1005,9 +1005,14 @@ def _box_item_values(self, key, values): raise NotImplementedError def _maybe_cache_changed(self, item, value): - """ the object has called back to us saying - maybe it has changed """ - self._data.set(item, value) + """ + the object has called back to us saying + maybe it has changed + + numpy < 1.8 has an issue with object arrays and aliasing + GH6026 + """ + self._data.set(item, value, check=pd._np_version_under1p8) @property def _is_cached(self): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 60e9baf005eb4..f73440be61600 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -26,7 +26,6 @@ from pandas.compat import range, lrange, lmap, callable, map, zip, u from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type - class Block(PandasObject): """ @@ -279,7 +278,7 @@ def get(self, item): def iget(self, i): return self.values[i] - def set(self, item, value): + def set(self, item, value, check=False): """ Modify Block in-place with new item value @@ -1360,6 +1359,26 @@ def convert(self, convert_dates=True, convert_numeric=True, convert_timedeltas=T return blocks + def set(self, item, value, check=False): + """ + Modify Block in-place with new item value + + Returns + ------- + None + """ + + loc = self.items.get_loc(item) + + # GH6026 + if check: + try: + if (self.values[loc] == value).all(): + return + except: + pass + self.values[loc] = value + def _maybe_downcast(self, blocks, downcast=None): if downcast is not None: @@ -1601,7 +1620,7 @@ def astype(self, dtype, copy=False, raise_on_error=True): return self._astype(dtype, copy=copy, raise_on_error=raise_on_error, klass=klass) - def set(self, item, value): + def set(self, item, value, check=False): """ Modify Block in-place with new item value @@ -1714,7 +1733,7 @@ def prepare_for_merge(self, **kwargs): def post_merge(self, items, **kwargs): return self - def set(self, item, value): + def set(self, item, value, check=False): self.values = value def get(self, item): @@ -2879,10 +2898,11 @@ def delete(self, item): if not is_unique: self._consolidate_inplace() - def set(self, item, value): + def set(self, item, value, check=False): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items + if check, then validate that we are not setting the same data in-place """ if not isinstance(value, SparseArray): if value.ndim == self.ndim - 1: @@ -2898,7 +2918,7 @@ def _set_item(item, arr): self._delete_from_block(i, item) self._add_new_block(item, arr, loc=None) else: - block.set(item, arr) + block.set(item, arr, check=check) try: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 4e60037017645..b763b885fe7b8 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1950,6 +1950,29 @@ def test_setitem_cache_updating(self): self.assert_(df.ix[0,'c'] == 0.0) self.assert_(df.ix[7,'c'] == 1.0) + def test_setitem_chained_setfault(self): + + # GH6026 + # setfaults under numpy 1.7.1 (ok on 1.8) + data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] + mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] + + df = DataFrame({'response': np.array(data)}) + mask = df.response == 'timeout' + df.response[mask] = 'none' + assert_frame_equal(df, DataFrame({'response': mdata })) + + recarray = np.rec.fromarrays([data], names=['response']) + df = DataFrame(recarray) + mask = df.response == 'timeout' + df.response[mask] = 'none' + assert_frame_equal(df, DataFrame({'response': mdata })) + + df = DataFrame({'response': data, 'response1' : data }) + mask = df.response == 'timeout' + df.response[mask] = 'none' + assert_frame_equal(df, DataFrame({'response': mdata, 'response1' : data })) + def test_detect_chained_assignment(self): pd.set_option('chained_assignment','raise')