Skip to content

Commit 24044d9

Browse files
committed
Merge pull request #3144 from jreback/dtypes
TST: putmasking changing dtype when not necessary
2 parents e04a239 + a9f4db4 commit 24044d9

File tree

3 files changed

+56
-16
lines changed

3 files changed

+56
-16
lines changed

pandas/core/common.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -754,17 +754,36 @@ def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
754754
if mask.any():
755755

756756
def changeit():
757+
757758
# our type is wrong here, need to upcast
758-
if (-mask).any():
759-
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
760-
np.putmask(r, mask, other)
759+
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
760+
np.putmask(r, mask, other)
761+
762+
# we need to actually change the dtype here
763+
if change is not None:
764+
change.dtype = r.dtype
765+
change[:] = r
761766

762-
# we need to actually change the dtype here
763-
if change is not None:
764-
change.dtype = r.dtype
765-
change[:] = r
767+
return r, True
768+
769+
# we want to decide whether putmask will work
770+
# if we have nans in the False portion of our mask then we need to upcast (possibily)
771+
# otherwise we DON't want to upcast (e.g. if we are have values, say integers in
772+
# the success portion then its ok to not upcast)
773+
new_dtype, fill_value = _maybe_promote(result.dtype,other)
774+
if new_dtype != result.dtype:
775+
776+
# we have a scalar or len 0 ndarray
777+
# and its nan and we are changing some values
778+
if np.isscalar(other) or (isinstance(other,np.ndarray) and other.ndim < 1):
779+
if isnull(other):
780+
return changeit()
781+
782+
# we have an ndarray and the masking has nans in it
783+
else:
766784

767-
return r, True
785+
if isnull(other[mask]).any():
786+
return changeit()
768787

769788
try:
770789
np.putmask(result, mask, other)
@@ -811,9 +830,9 @@ def _possibly_downcast_to_dtype(result, dtype):
811830
return result
812831

813832
try:
814-
if dtype == np.float_:
833+
if issubclass(dtype.type,np.floating):
815834
return result.astype(dtype)
816-
elif dtype == np.bool_ or dtype == np.int_:
835+
elif dtype == np.bool_ or issubclass(dtype.type,np.integer):
817836
if issubclass(result.dtype.type, np.number) and notnull(result).all():
818837
new_result = result.astype(dtype)
819838
if (new_result == result).all():

pandas/core/index.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
107107
return PeriodIndex(data, copy=copy, name=name)
108108

109109
if issubclass(data.dtype.type, np.integer):
110-
return Int64Index(data, copy=copy, name=name)
110+
return Int64Index(data, copy=copy, dtype=dtype, name=name)
111111

112112
subarr = com._ensure_object(data)
113113
elif np.isscalar(data):
@@ -1296,7 +1296,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
12961296
raise TypeError('String dtype not supported, you may need '
12971297
'to explicitly cast to int')
12981298
elif issubclass(data.dtype.type, np.integer):
1299-
subarr = np.array(data, dtype=np.int64, copy=copy)
1299+
# don't force the upcast as we may be dealing
1300+
# with a platform int
1301+
if dtype is None or not issubclass(np.dtype(dtype).type, np.integer):
1302+
dtype = np.int64
1303+
1304+
subarr = np.array(data, dtype=dtype, copy=copy)
13001305
else:
13011306
subarr = np.array(data, dtype=np.int64, copy=copy)
13021307
if len(data) > 0:
@@ -1316,10 +1321,6 @@ def inferred_type(self):
13161321
def _constructor(self):
13171322
return Int64Index
13181323

1319-
@cache_readonly
1320-
def dtype(self):
1321-
return np.dtype('int64')
1322-
13231324
@property
13241325
def is_all_dates(self):
13251326
"""

pandas/tests/test_common.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,26 @@ def test_ensure_int32():
253253
result = com._ensure_int32(values)
254254
assert(result.dtype == np.int32)
255255

256+
def test_ensure_platform_int():
257+
258+
# verify that when we create certain types of indices
259+
# they remain the correct type under platform conversions
260+
from pandas.core.index import Int64Index
261+
262+
# int64
263+
x = Int64Index([1, 2, 3], dtype='int64')
264+
assert(x.dtype == np.int64)
265+
266+
pi = com._ensure_platform_int(x)
267+
assert(pi.dtype == np.int_)
268+
269+
# int32
270+
x = Int64Index([1, 2, 3], dtype='int32')
271+
assert(x.dtype == np.int32)
272+
273+
pi = com._ensure_platform_int(x)
274+
assert(pi.dtype == np.int_)
275+
256276
# TODO: fix this broken test
257277

258278
# def test_console_encode():

0 commit comments

Comments
 (0)