From 154252a79080afb5f906261db48c09527e211ca1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 8 Apr 2013 18:13:50 -0700 Subject: [PATCH 1/2] BUG: test case showing why assigning to dtype is unsafe --- pandas/tests/test_series.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index dc036a933e2fe..3abcee52f181d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1127,12 +1127,21 @@ def test_where(self): self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0,2,3]) self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), []) + + s = Series(np.arange(10), dtype=np.int32) + mask = s < 5 + s[mask] = range(5) + expected = Series(np.arange(10), dtype=np.int32) + assert_series_equal(s, expected) + self.assertEquals(s.dtype, expected.dtype) + # GH3235 s = Series(np.arange(10)) mask = s < 5 s[mask] = range(5) - expected = Series(np.arange(10),dtype='float64') - assert_series_equal(s,expected) + expected = Series(np.arange(10)) + assert_series_equal(s, expected) + self.assertEquals(s.dtype, expected.dtype) s = Series(np.arange(10)) mask = s > 5 From b379772359a06d2fe9a646ad60bc7c4c125bfd6a Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 8 Apr 2013 22:04:00 -0400 Subject: [PATCH 2/2] BUG: fix unsafe dtype changes in putmasking on series allow boolean indexing of series w/o changing the dtype with a list of the rhs if we can preserver the dtype of the input, if we can't upcast, but can only do this in cases where we won't change the itemsize --- pandas/core/common.py | 17 +++++++++++++++- pandas/core/series.py | 8 +++++++- pandas/tests/test_series.py | 40 +++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 49d3015a5c871..60d3e86e185fe 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -760,12 +760,27 @@ def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None): def changeit(): - # our type is wrong here, need to upcast + # try to directly set by expanding our array to full + # length of the boolean + om = other[mask] + om_at = om.astype(result.dtype) + if (om == om_at).all(): + new_other = result.values.copy() + new_other[mask] = om_at + result[:] = new_other + return result, False + + # we are forced to change the dtype of the result as the input isn't compatible r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True) np.putmask(r, mask, other) # we need to actually change the dtype here if change is not None: + + # if we are trying to do something unsafe + # like put a bigger dtype in a smaller one, use the smaller one + if change.dtype.itemsize < r.dtype.itemsize: + raise Exception("cannot change dtype of input to smaller size") change.dtype = r.dtype change[:] = r diff --git a/pandas/core/series.py b/pandas/core/series.py index cd9ed90c57a43..8427274488cef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -739,7 +739,13 @@ def where(self, cond, other=nan, inplace=False): if isinstance(other, Series): other = other.reindex(ser.index) elif isinstance(other, (tuple,list)): - other = np.array(other) + + # try to set the same dtype as ourselves + new_other = np.array(other,dtype=self.dtype) + if not (new_other == np.array(other)).all(): + other = np.array(other) + else: + other = new_other if len(other) != len(ser): icond = ~cond diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 3abcee52f181d..4f17135385748 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1127,26 +1127,44 @@ def test_where(self): self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0,2,3]) self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), []) - - s = Series(np.arange(10), dtype=np.int32) - mask = s < 5 - s[mask] = range(5) - expected = Series(np.arange(10), dtype=np.int32) - assert_series_equal(s, expected) - self.assertEquals(s.dtype, expected.dtype) + # unsafe dtype changes + for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + s[mask] = range(2,7) + expected = Series(range(2,7) + range(5,10), dtype=dtype) + assert_series_equal(s, expected) + self.assertEquals(s.dtype, expected.dtype) + + # these are allowed operations, but are upcasted + for dtype in [ np.int64, np.float64 ]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + values = [2.5,3.5,4.5,5.5,6.5] + s[mask] = values + expected = Series(values + range(5,10), dtype='float64') + assert_series_equal(s, expected) + self.assertEquals(s.dtype, expected.dtype) + + # can't do these as we are forced to change the itemsize of the input to something we cannot + for dtype in [ np.int8, np.int16, np.int32, np.float16, np.float32 ]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + values = [2.5,3.5,4.5,5.5,6.5] + self.assertRaises(Exception, s.__setitem__, tuple(mask), values) # GH3235 s = Series(np.arange(10)) mask = s < 5 - s[mask] = range(5) - expected = Series(np.arange(10)) + s[mask] = range(2,7) + expected = Series(range(2,7) + range(5,10)) assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) s = Series(np.arange(10)) mask = s > 5 s[mask] = [0]*4 - expected = Series([0,1,2,3,4,5] + [0]*4,dtype='float64') + expected = Series([0,1,2,3,4,5] + [0]*4) assert_series_equal(s,expected) s = Series(np.arange(10)) @@ -3174,7 +3192,7 @@ def test_cast_on_putmask(self): # need to upcast s = Series([1,2],index=[1,2],dtype='int64') s[[True, False]] = Series([0],index=[1],dtype='int64') - expected = Series([0,2],index=[1,2],dtype='float64') + expected = Series([0,2],index=[1,2],dtype='int64') assert_series_equal(s, expected)