Skip to content

TST: putmasking changing dtype when not necessary #3144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 23, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,17 +754,36 @@ def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
if mask.any():

def changeit():

# our type is wrong here, need to upcast
if (-mask).any():
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)

# we need to actually change the dtype here
if change is not None:
change.dtype = r.dtype
change[:] = r

# we need to actually change the dtype here
if change is not None:
change.dtype = r.dtype
change[:] = r
return r, True

# we want to decide whether putmask will work
# if we have nans in the False portion of our mask then we need to upcast (possibily)
# otherwise we DON't want to upcast (e.g. if we are have values, say integers in
# the success portion then its ok to not upcast)
new_dtype, fill_value = _maybe_promote(result.dtype,other)
if new_dtype != result.dtype:

# we have a scalar or len 0 ndarray
# and its nan and we are changing some values
if np.isscalar(other) or (isinstance(other,np.ndarray) and other.ndim < 1):
if isnull(other):
return changeit()

# we have an ndarray and the masking has nans in it
else:

return r, True
if isnull(other[mask]).any():
return changeit()

try:
np.putmask(result, mask, other)
Expand Down Expand Up @@ -811,9 +830,9 @@ def _possibly_downcast_to_dtype(result, dtype):
return result

try:
if dtype == np.float_:
if issubclass(dtype.type,np.floating):
return result.astype(dtype)
elif dtype == np.bool_ or dtype == np.int_:
elif dtype == np.bool_ or issubclass(dtype.type,np.integer):
if issubclass(result.dtype.type, np.number) and notnull(result).all():
new_result = result.astype(dtype)
if (new_result == result).all():
Expand Down
13 changes: 7 additions & 6 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
return PeriodIndex(data, copy=copy, name=name)

if issubclass(data.dtype.type, np.integer):
return Int64Index(data, copy=copy, name=name)
return Int64Index(data, copy=copy, dtype=dtype, name=name)

subarr = com._ensure_object(data)
elif np.isscalar(data):
Expand Down Expand Up @@ -1296,7 +1296,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
raise TypeError('String dtype not supported, you may need '
'to explicitly cast to int')
elif issubclass(data.dtype.type, np.integer):
subarr = np.array(data, dtype=np.int64, copy=copy)
# don't force the upcast as we may be dealing
# with a platform int
if dtype is None or not issubclass(np.dtype(dtype).type, np.integer):
dtype = np.int64

subarr = np.array(data, dtype=dtype, copy=copy)
else:
subarr = np.array(data, dtype=np.int64, copy=copy)
if len(data) > 0:
Expand All @@ -1316,10 +1321,6 @@ def inferred_type(self):
def _constructor(self):
return Int64Index

@cache_readonly
def dtype(self):
return np.dtype('int64')

@property
def is_all_dates(self):
"""
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,26 @@ def test_ensure_int32():
result = com._ensure_int32(values)
assert(result.dtype == np.int32)

def test_ensure_platform_int():

# verify that when we create certain types of indices
# they remain the correct type under platform conversions
from pandas.core.index import Int64Index

# int64
x = Int64Index([1, 2, 3], dtype='int64')
assert(x.dtype == np.int64)

pi = com._ensure_platform_int(x)
assert(pi.dtype == np.int_)

# int32
x = Int64Index([1, 2, 3], dtype='int32')
assert(x.dtype == np.int32)

pi = com._ensure_platform_int(x)
assert(pi.dtype == np.int_)

# TODO: fix this broken test

# def test_console_encode():
Expand Down