From 195b8304ee3698ba8f15acee139e74001137bd63 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 5 Feb 2017 22:09:13 -0500 Subject: [PATCH] BUG: Error when specifying int index containing NaN Partially addresses gh-15187. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/base.py | 27 +++++++++++++++++++++++---- pandas/tests/indexes/test_base.py | 17 +++++++++++++++++ pandas/tests/indexes/test_numeric.py | 27 ++++++++++++++++++++++++++- pandas/tests/indexes/test_range.py | 28 +++++++++++++++++++++++++++- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8f671062464f0..a0f65c27ba993 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -765,6 +765,7 @@ Bug Fixes - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 607a463083fdd..7f46f437489a1 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -203,6 +203,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: + if isnull(data).any(): + raise ValueError('cannot convert float ' + 'NaN to integer') # If we are actually all equal to integers, # then coerce to integer. @@ -230,8 +233,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: data = np.array(data, dtype=dtype, copy=copy) - except (TypeError, ValueError): - pass + except (TypeError, ValueError) as e: + msg = str(e) + if 'cannot convert float' in msg: + raise # maybe coerce to a sub-class from pandas.tseries.period import (PeriodIndex, @@ -585,7 +590,14 @@ def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) - return self._shallow_copy_with_infer(values, dtype=self.dtype) + + dtype = self.dtype + if self._is_numeric_dtype and np.any(isnull(values)): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return self._shallow_copy_with_infer(values, dtype=dtype) def ravel(self, order='C'): """ @@ -689,7 +701,14 @@ def _coerce_scalar_to_index(self, item): ---------- item : scalar item to coerce """ - return Index([item], dtype=self.dtype, **self._get_attributes_dict()) + dtype = self.dtype + + if self._is_numeric_dtype and isnull(item): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return Index([item], dtype=dtype, **self._get_attributes_dict()) _index_shared_docs['copy'] = """ Make a copy of this object. Name and dtype sets those attributes on diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8c0a399cb58b3..05d3478ab0705 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -199,6 +199,23 @@ def __array__(self, dtype=None): result = pd.Index(ArrayLike(array)) self.assert_index_equal(result, expected) + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + msg = "cannot convert" + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='int64') + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='uint64') + + # This, however, should not break + # because NaN is float. + expected = Float64Index(data) + result = Index(data, dtype='float') + tm.assert_index_equal(result, expected) + def test_index_ctor_infer_nan_nat(self): # GH 13467 exp = pd.Float64Index([np.nan, np.nan]) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e23e7c19ed799..d0ce34169f79e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -5,7 +5,7 @@ import numpy as np -from pandas import (date_range, Series, Index, Float64Index, +from pandas import (date_range, notnull, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -686,6 +686,31 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) tm.assertIsInstance(arr, Index) + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38e715fce2720..53c88897d6764 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -8,7 +8,8 @@ import numpy as np -from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex) +from pandas import (notnull, Series, Index, Float64Index, + Int64Index, RangeIndex) from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -915,3 +916,28 @@ def test_len_specialised(self): i = RangeIndex(0, 5, step) self.assertEqual(len(i), 0) + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected)