Skip to content

BUG: fix unsafe dtype conversion in Series #3292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 9, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,12 +760,27 @@ def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):

def changeit():

# our type is wrong here, need to upcast
# try to directly set by expanding our array to full
# length of the boolean
om = other[mask]
om_at = om.astype(result.dtype)
if (om == om_at).all():
new_other = result.values.copy()
new_other[mask] = om_at
result[:] = new_other
return result, False

# we are forced to change the dtype of the result as the input isn't compatible
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)

# we need to actually change the dtype here
if change is not None:

# if we are trying to do something unsafe
# like put a bigger dtype in a smaller one, use the smaller one
if change.dtype.itemsize < r.dtype.itemsize:
raise Exception("cannot change dtype of input to smaller size")
change.dtype = r.dtype
change[:] = r

Expand Down
8 changes: 7 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,13 @@ def where(self, cond, other=nan, inplace=False):
if isinstance(other, Series):
other = other.reindex(ser.index)
elif isinstance(other, (tuple,list)):
other = np.array(other)

# try to set the same dtype as ourselves
new_other = np.array(other,dtype=self.dtype)
if not (new_other == np.array(other)).all():
other = np.array(other)
else:
other = new_other

if len(other) != len(ser):
icond = ~cond
Expand Down
37 changes: 32 additions & 5 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,17 +1127,44 @@ def test_where(self):
self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0,2,3])
self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), [])

# unsafe dtype changes
for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
s[mask] = range(2,7)
expected = Series(range(2,7) + range(5,10), dtype=dtype)
assert_series_equal(s, expected)
self.assertEquals(s.dtype, expected.dtype)

# these are allowed operations, but are upcasted
for dtype in [ np.int64, np.float64 ]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
values = [2.5,3.5,4.5,5.5,6.5]
s[mask] = values
expected = Series(values + range(5,10), dtype='float64')
assert_series_equal(s, expected)
self.assertEquals(s.dtype, expected.dtype)

# can't do these as we are forced to change the itemsize of the input to something we cannot
for dtype in [ np.int8, np.int16, np.int32, np.float16, np.float32 ]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
values = [2.5,3.5,4.5,5.5,6.5]
self.assertRaises(Exception, s.__setitem__, tuple(mask), values)

# GH3235
s = Series(np.arange(10))
mask = s < 5
s[mask] = range(5)
expected = Series(np.arange(10),dtype='float64')
assert_series_equal(s,expected)
s[mask] = range(2,7)
expected = Series(range(2,7) + range(5,10))
assert_series_equal(s, expected)
self.assertEquals(s.dtype, expected.dtype)

s = Series(np.arange(10))
mask = s > 5
s[mask] = [0]*4
expected = Series([0,1,2,3,4,5] + [0]*4,dtype='float64')
expected = Series([0,1,2,3,4,5] + [0]*4)
assert_series_equal(s,expected)

s = Series(np.arange(10))
Expand Down Expand Up @@ -3165,7 +3192,7 @@ def test_cast_on_putmask(self):
# need to upcast
s = Series([1,2],index=[1,2],dtype='int64')
s[[True, False]] = Series([0],index=[1],dtype='int64')
expected = Series([0,2],index=[1,2],dtype='float64')
expected = Series([0,2],index=[1,2],dtype='int64')

assert_series_equal(s, expected)

Expand Down