diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 62e643b095c4d..143e6017b462a 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -607,7 +607,7 @@ def test_unary_in_array(self): '-37, 37, ~37, +37]'), np.array([-True, True, ~True, +True, -False, False, ~False, +False, - -37, 37, ~37, +37])) + -37, 37, ~37, +37], dtype=np.object_)) def test_disallow_scalar_bool_ops(self): exprs = '1 or 2', '1 and 2' diff --git a/pandas/core/common.py b/pandas/core/common.py index d41d49c895599..c64cfa77b9e62 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -307,7 +307,8 @@ def array_equivalent(left, right, strict_nan=False): return False # Object arrays can contain None, NaN and NaT. - if is_object_dtype(left) or is_object_dtype(right): + # string dtypes must be come to this path for NumPy 1.7.1 compat + if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: # pd.isnull considers NaN and None to be equivalent. diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 9f41bbac03cbf..6fe559e5cacd8 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -139,7 +139,7 @@ def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None, check_index_type=True, - check_column_type=True): + check_column_type=True, check_numpy_dtype=False): if sort is not None: df = df.sort_values(sort) else: @@ -181,14 +181,16 @@ def _check_orient(df, orient, dtype=None, numpy=False, unser.index.values.astype('i8') * 1e6) if orient == "records": # index is not captured in this orientation - assert_almost_equal(df.values, unser.values) + assert_almost_equal(df.values, unser.values, + check_dtype=check_numpy_dtype) self.assertTrue(df.columns.equals(unser.columns)) elif orient == "values": # index and cols are not captured in this orientation if numpy is True and df.shape == (0, 0): assert unser.shape[0] == 0 else: - assert_almost_equal(df.values, unser.values) + assert_almost_equal(df.values, unser.values, + check_dtype=check_numpy_dtype) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] @@ -196,7 +198,8 @@ def _check_orient(df, orient, dtype=None, numpy=False, if sort is None: unser = unser.sort_index() - assert_almost_equal(df.values, unser.values) + assert_almost_equal(df.values, unser.values, + check_dtype=check_numpy_dtype) else: if convert_axes: assert_frame_equal(df, unser, check_dtype=check_dtype, diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 345715ee0528d..26d018c56a8a8 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -272,7 +272,8 @@ def test_constructor_bool(self): self.assertEqual(arr.dtype, bool) tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) - tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3])) + tm.assert_numpy_array_equal(arr.sp_index.indices, + np.array([2, 3], np.int32)) for dense in [arr.to_dense(), arr.values]: self.assertEqual(dense.dtype, bool) @@ -297,9 +298,11 @@ def test_constructor_float32(self): arr = SparseArray(data, dtype=np.float32) self.assertEqual(arr.dtype, np.float32) - tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3])) + tm.assert_numpy_array_equal(arr.sp_values, + np.array([1, 3], dtype=np.float32)) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) - tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2])) + tm.assert_numpy_array_equal(arr.sp_index.indices, + np.array([0, 2], dtype=np.int32)) for dense in [arr.to_dense(), arr.values]: self.assertEqual(dense.dtype, np.float32) @@ -516,7 +519,7 @@ def test_fillna_overlap(self): # filling with existing value doesn't replace existing value with # fill_value, i.e. existing 3 remains in sp_values res = s.fillna(3) - exp = np.array([1, 3, 3, 3, 3]) + exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) tm.assert_numpy_array_equal(res.to_dense(), exp) s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx index 9839c9f416160..9f102ded597fd 100644 --- a/pandas/src/testing.pyx +++ b/pandas/src/testing.pyx @@ -1,7 +1,7 @@ import numpy as np from pandas import compat -from pandas.core.common import isnull, array_equivalent +from pandas.core.common import isnull, array_equivalent, is_dtype_equal cdef NUMERIC_TYPES = ( bool, @@ -55,7 +55,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True): return True -cpdef assert_almost_equal(a, b, bint check_less_precise=False, +cpdef assert_almost_equal(a, b, bint check_less_precise=False, check_dtype=True, obj=None, lobj=None, robj=None): """Check that left and right objects are almost equal. @@ -66,6 +66,8 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False, check_less_precise : bool, default False Specify comparison precision. 5 digits (False) or 3 digits (True) after decimal points are compared. + check_dtype: bool, default True + check dtype if both a and b are np.ndarray obj : str, default None Specify object name being compared, internally used to show appropriate assertion message @@ -82,7 +84,7 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False, double diff = 0.0 Py_ssize_t i, na, nb double fa, fb - bint is_unequal = False + bint is_unequal = False, a_is_ndarray, b_is_ndarray if lobj is None: lobj = a @@ -97,36 +99,43 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False, assert a == b, "%r != %r" % (a, b) return True + a_is_ndarray = isinstance(a, np.ndarray) + b_is_ndarray = isinstance(b, np.ndarray) + + if obj is None: + if a_is_ndarray or b_is_ndarray: + obj = 'numpy array' + else: + obj = 'Iterable' + if isiterable(a): if not isiterable(b): - from pandas.util.testing import raise_assert_detail - if obj is None: - obj = 'Iterable' - msg = "First object is iterable, second isn't" - raise_assert_detail(obj, msg, a, b) + from pandas.util.testing import assert_class_equal + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) assert has_length(a) and has_length(b), ( "Can't compare objects without length, one or both is invalid: " - "(%r, %r)" % (a, b) - ) + "(%r, %r)" % (a, b)) - if isinstance(a, np.ndarray) and isinstance(b, np.ndarray): - if obj is None: - obj = 'numpy array' + if a_is_ndarray and b_is_ndarray: na, nb = a.size, b.size if a.shape != b.shape: from pandas.util.testing import raise_assert_detail raise_assert_detail(obj, '{0} shapes are different'.format(obj), a.shape, b.shape) + + if check_dtype and not is_dtype_equal(a, b): + from pandas.util.testing import assert_attr_equal + assert_attr_equal('dtype', a, b, obj=obj) + try: if array_equivalent(a, b, strict_nan=True): return True except: pass else: - if obj is None: - obj = 'Iterable' na, nb = len(a), len(b) if na != nb: @@ -149,27 +158,20 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False, return True elif isiterable(b): - from pandas.util.testing import raise_assert_detail - if obj is None: - obj = 'Iterable' - msg = "Second object is iterable, first isn't" - raise_assert_detail(obj, msg, a, b) + from pandas.util.testing import assert_class_equal + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) - if isnull(a): - assert isnull(b), ( - "First object is null, second isn't: %r != %r" % (a, b) - ) + if a == b: + # object comparison return True - elif isnull(b): - assert isnull(a), ( - "First object is not null, second is null: %r != %r" % (a, b) - ) + if isnull(a) and isnull(b): + # nan / None comparison return True - - if is_comparable_as_number(a): - assert is_comparable_as_number(b), ( - "First object is numeric, second is not: %r != %r" % (a, b) - ) + if is_comparable_as_number(a) and is_comparable_as_number(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True decimal = 5 @@ -177,26 +179,17 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False, if check_less_precise: decimal = 3 - if np.isinf(a): - assert np.isinf(b), "First object is inf, second isn't" - if np.isposinf(a): - assert np.isposinf(b), "First object is positive inf, second is negative inf" - else: - assert np.isneginf(b), "First object is negative inf, second is positive inf" + fa, fb = a, b + + # case for zero + if abs(fa) < 1e-5: + if not decimal_almost_equal(fa, fb, decimal): + assert False, ( + '(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal) + ) else: - fa, fb = a, b - - # case for zero - if abs(fa) < 1e-5: - if not decimal_almost_equal(fa, fb, decimal): - assert False, ( - '(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal) - ) - else: - if not decimal_almost_equal(1, fb / fa, decimal): - assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal) - - else: - assert a == b, "%r != %r" % (a, b) + if not decimal_almost_equal(1, fb / fa, decimal): + assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal) + return True - return True + raise AssertionError("{0} != {1}".format(a, b)) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a6e46b7d0c756..1e3940dc8f038 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1430,8 +1430,8 @@ def test_setitem_frame(self): # already aligned f = self.mixed_frame.copy() - piece = DataFrame([[1, 2], [3, 4]], index=f.index[ - 0:2], columns=['A', 'B']) + piece = DataFrame([[1., 2.], [3., 4.]], + index=f.index[0:2], columns=['A', 'B']) key = (slice(None, 2), ['A', 'B']) f.ix[key] = piece assert_almost_equal(f.ix[0:2, ['A', 'B']].values, @@ -1439,8 +1439,9 @@ def test_setitem_frame(self): # rows unaligned f = self.mixed_frame.copy() - piece = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=list( - f.index[0:2]) + ['foo', 'bar'], columns=['A', 'B']) + piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]], + index=list(f.index[0:2]) + ['foo', 'bar'], + columns=['A', 'B']) key = (slice(None, 2), ['A', 'B']) f.ix[key] = piece assert_almost_equal(f.ix[0:2:, ['A', 'B']].values, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index f1e2724e628bf..8ea87e9d69c92 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -274,7 +274,7 @@ def test_argsort(self): result = ind.argsort() expected = np.array(ind).argsort() - tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_numpy_argsort(self): for k, ind in self.indices.items(): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 1d8a52e48e468..66ddcdebff83b 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -216,13 +216,15 @@ def test_map(self): ordered=False) tm.assert_categorical_equal(result, exp) - tm.assert_numpy_array_equal(ci.map(lambda x: 1), np.array([1] * 5)) + tm.assert_numpy_array_equal(ci.map(lambda x: 1), + np.array([1] * 5, dtype=np.int64)) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False) def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) + result = ci.map(f) exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) @@ -340,30 +342,35 @@ def test_reindexing(self): tm.assert_numpy_array_equal(expected, actual) def test_reindex_dtype(self): - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c' - ]) + c = CategoricalIndex(['a', 'b', 'c', 'a']) + res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex( - Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex( - ['a', 'a', 'c'], categories=['a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex( - ['a', 'b', 'c', 'a' - ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) - tm.assert_index_equal(res, Index( - ['a', 'a', 'c'], dtype='object'), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex( - ['a', 'b', 'c', 'a'], - categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex( - ['a', 'a', 'c'], categories=['a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) + tm.assert_numpy_array_equal(indexer, + np.array([0, 3, 2], dtype=np.int64)) + + c = CategoricalIndex(['a', 'b', 'c', 'a']) + res, indexer = c.reindex(Categorical(['a', 'c'])) + + exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([0, 3, 2], dtype=np.int64)) + + c = CategoricalIndex(['a', 'b', 'c', 'a'], + categories=['a', 'b', 'c', 'd']) + res, indexer = c.reindex(['a', 'c']) + exp = Index(['a', 'a', 'c'], dtype='object') + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([0, 3, 2], dtype=np.int64)) + + c = CategoricalIndex(['a', 'b', 'c', 'a'], + categories=['a', 'b', 'c', 'd']) + res, indexer = c.reindex(Categorical(['a', 'c'])) + exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([0, 3, 2], dtype=np.int64)) def test_duplicates(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 46180a823c002..b8804daa6cf19 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -147,8 +147,8 @@ def assert_matching(actual, expected): self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): act = np.asarray(act) - exp = np.asarray(exp) - assert_almost_equal(act, exp) + exp = np.asarray(exp, dtype=np.object_) + tm.assert_numpy_array_equal(act, exp) # level changing [w/o mutation] ind2 = self.index.set_levels(new_levels) @@ -211,8 +211,8 @@ def assert_matching(actual, expected): self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): act = np.asarray(act) - exp = np.asarray(exp) - assert_almost_equal(act, exp) + exp = np.asarray(exp, dtype=np.int8) + tm.assert_numpy_array_equal(act, exp) # label changing [w/o mutation] ind2 = self.index.set_labels(new_labels) @@ -1699,8 +1699,8 @@ def test_join_multi(self): jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) exp_idx = pd.MultiIndex.from_product( [np.arange(4), [1, 2]], names=['a', 'b']) - exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14]) - exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.int_) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.int64) self.assert_index_equal(jidx, exp_idx) self.assert_numpy_array_equal(lidx, exp_lidx) self.assert_numpy_array_equal(ridx, exp_ridx) @@ -1713,7 +1713,7 @@ def test_join_multi(self): # keep MultiIndex jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, - 1, -1]) + 1, -1], dtype=np.int64) self.assert_index_equal(jidx, midx) self.assertIsNone(lidx) self.assert_numpy_array_equal(ridx, exp_ridx) @@ -1743,11 +1743,11 @@ def test_reindex_level(self): exp_index2 = self.index.join(idx, level='second', how='left') self.assertTrue(target.equals(exp_index)) - exp_indexer = np.array([0, 2, 4]) + exp_indexer = np.array([0, 2, 4], dtype=np.int64) tm.assert_numpy_array_equal(indexer, exp_indexer) self.assertTrue(target2.equals(exp_index2)) - exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1], dtype=np.int64) tm.assert_numpy_array_equal(indexer2, exp_indexer2) assertRaisesRegexp(TypeError, "Fill method not supported", diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 06923e364bc63..8592ae1741a4e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -580,9 +580,9 @@ def test_join_outer(self): eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.int64) + dtype=np.int_) eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], - dtype=np.int64) + dtype=np.int_) tm.assertIsInstance(res, Int64Index) self.assertTrue(res.equals(eres)) @@ -595,6 +595,8 @@ def test_join_outer(self): noidx_res = self.index.join(other_mono, how='outer') self.assertTrue(res.equals(noidx_res)) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], + dtype=np.int64) eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.int64) tm.assertIsInstance(res, Int64Index) @@ -617,8 +619,8 @@ def test_join_inner(self): ridx = ridx.take(ind) eres = Int64Index([2, 12]) - elidx = np.array([1, 6]) - eridx = np.array([4, 1]) + elidx = np.array([1, 6], dtype=np.int_) + eridx = np.array([4, 1], dtype=np.int_) tm.assertIsInstance(res, Int64Index) self.assertTrue(res.equals(eres)) @@ -632,7 +634,8 @@ def test_join_inner(self): res2 = self.index.intersection(other_mono) self.assertTrue(res.equals(res2)) - eridx = np.array([1, 4]) + elidx = np.array([1, 6], dtype=np.int64) + eridx = np.array([1, 4], dtype=np.int64) tm.assertIsInstance(res, Int64Index) self.assertTrue(res.equals(eres)) tm.assert_numpy_array_equal(lidx, elidx) @@ -647,7 +650,7 @@ def test_join_left(self): return_indexers=True) eres = self.index eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], - dtype=np.int64) + dtype=np.int_) tm.assertIsInstance(res, Int64Index) self.assertTrue(res.equals(eres)) @@ -669,8 +672,8 @@ def test_join_left(self): idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - eridx = np.array([0, 1, 2, 3, -1, -1]) - elidx = np.array([0, 0, 1, 2, 3, 4]) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) self.assertTrue(res.equals(eres)) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -683,7 +686,7 @@ def test_join_right(self): res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int_) tm.assertIsInstance(other, Int64Index) self.assertTrue(res.equals(eres)) @@ -705,8 +708,8 @@ def test_join_right(self): idx2 = Index([1, 2, 5, 7, 9]) res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - elidx = np.array([0, 1, 2, 3, -1, -1]) - eridx = np.array([0, 0, 1, 2, 3, 4]) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) self.assertTrue(res.equals(eres)) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -747,10 +750,10 @@ def test_join_non_unique(self): exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) self.assertTrue(joined.equals(exp_joined)) - exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int64) + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int_) tm.assert_numpy_array_equal(lidx, exp_lidx) - exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64) + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int_) tm.assert_numpy_array_equal(ridx, exp_ridx) def test_join_self(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index f41c252f44d39..8b04b510146d2 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -381,9 +381,9 @@ def test_join_outer(self): eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, - -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + -1, -1, -1, -1, -1, -1, -1], dtype=np.int_) eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, - 5, 4, 3, 2, 1, 0], dtype=np.int64) + 5, 4, 3, 2, 1, 0], dtype=np.int_) self.assertIsInstance(res, Int64Index) self.assertFalse(isinstance(res, RangeIndex)) @@ -445,8 +445,7 @@ def test_join_left(self): res, lidx, ridx = self.index.join(other, how='left', return_indexers=True) eres = self.index - eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], - dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.int_) self.assertIsInstance(res, RangeIndex) self.assertTrue(res.equals(eres)) @@ -472,7 +471,7 @@ def test_join_right(self): return_indexers=True) eres = other elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], - dtype=np.int64) + dtype=np.int_) self.assertIsInstance(other, Int64Index) self.assertTrue(res.equals(eres)) @@ -525,9 +524,9 @@ def test_join_non_unique(self): res, lidx, ridx = self.index.join(other, return_indexers=True) eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) - elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int_) eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], - dtype=np.int64) + dtype=np.int_) self.assertTrue(res.equals(eres)) self.assert_numpy_array_equal(lidx, elidx) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index e1ae923c5c866..574dcd54933ae 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -77,8 +77,9 @@ def test_rename_set_name_inplace(self): for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: s.rename(name, inplace=True) self.assertEqual(s.name, name) - self.assert_numpy_array_equal(s.index.values, - np.array(['a', 'b', 'c'])) + + exp = np.array(['a', 'b', 'c'], dtype=np.object_) + self.assert_numpy_array_equal(s.index.values, exp) def test_set_name_attribute(self): s = Series([1, 2, 3]) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index dd109d8031079..002b7fa3aa8df 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1358,13 +1358,13 @@ def test_searchsorted_numeric_dtypes_scalar(self): tm.assert_equal(r, e) r = s.searchsorted([30]) - e = np.array([2]) + e = np.array([2], dtype=np.int64) tm.assert_numpy_array_equal(r, e) def test_searchsorted_numeric_dtypes_vector(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted([91, 2e6]) - e = np.array([3, 4]) + e = np.array([3, 4], dtype=np.int64) tm.assert_numpy_array_equal(r, e) def test_search_sorted_datetime64_scalar(self): @@ -1378,14 +1378,14 @@ def test_search_sorted_datetime64_list(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) v = [pd.Timestamp('20120102'), pd.Timestamp('20120104')] r = s.searchsorted(v) - e = np.array([1, 2]) + e = np.array([1, 2], dtype=np.int64) tm.assert_numpy_array_equal(r, e) def test_searchsorted_sorter(self): # GH8490 s = Series([3, 1, 2]) r = s.searchsorted([0, 3], sorter=np.argsort(s)) - e = np.array([0, 2]) + e = np.array([0, 2], dtype=np.int64) tm.assert_numpy_array_equal(r, e) def test_is_unique(self): diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 90ee834aaf9c2..5b12baf6c6fc5 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -316,16 +316,18 @@ def test_strftime(self): datetime_index = date_range('20150301', periods=5) result = datetime_index.strftime("%Y/%m/%d") - expected = np.array( - ['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', - '2015/03/05'], dtype=object) - self.assert_numpy_array_equal(result, expected) + + expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', + '2015/03/04', '2015/03/05'], dtype=np.object_) + # dtype may be S10 or U10 depending on python version + print(result) + print(expected) + self.assert_numpy_array_equal(result, expected, check_dtype=False) period_index = period_range('20150301', periods=5) result = period_index.strftime("%Y/%m/%d") - expected = np.array( - ['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', - '2015/03/05'], dtype=object) + expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', + '2015/03/04', '2015/03/05'], dtype='= '1.8': - self.assert_numpy_array_equal(other / td, np.array([1])) + self.assert_numpy_array_equal(other / td, + np.array([1], dtype=np.float64)) # timedelta, datetime other = pd.to_datetime(['2000-01-01']).values diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 37e708df2595d..3d8e389ba30f2 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3765,7 +3765,8 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, np.arange(10)) + self.assert_numpy_array_equal(dti.nanosecond, + np.arange(10, dtype=np.int32)) def test_datetimeindex_diff(self): dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 3961a8b99b4dd..cd1ce5904738a 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -363,7 +363,7 @@ def test_field_access_localize(self): dr = date_range('2011-10-02 00:00', freq='h', periods=10, tz=self.tzstr('America/Atikokan')) - expected = np.arange(10) + expected = np.arange(10, dtype=np.int32) self.assert_numpy_array_equal(dr.hour, expected) def test_with_tz(self): @@ -890,7 +890,8 @@ def test_tslib_tz_convert_trans_pos_plus_1__bug(self): idx = idx.tz_localize('UTC') idx = idx.tz_convert('Europe/Moscow') - expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + expected = np.repeat(np.array([3, 4, 5], dtype=np.int32), + np.array([n, n, 1])) self.assert_numpy_array_equal(idx.hour, expected) def test_tslib_tz_convert_dst(self): @@ -900,14 +901,15 @@ def test_tslib_tz_convert_dst(self): tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5]), + 0, 1, 3, 4, 5], dtype=np.int32), np.array([n, n, n, n, n, n, n, n, n, n, 1])) self.assert_numpy_array_equal(idx.hour, expected) idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + dtype=np.int32), np.array([n, n, n, n, n, n, n, n, n, n, 1])) self.assert_numpy_array_equal(idx.hour, expected) @@ -916,7 +918,7 @@ def test_tslib_tz_convert_dst(self): tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4]), + 0, 1, 1, 2, 3, 4], dtype=np.int32), np.array([n, n, n, n, n, n, n, n, n, n, 1])) self.assert_numpy_array_equal(idx.hour, expected) @@ -924,7 +926,7 @@ def test_tslib_tz_convert_dst(self): tz='US/Eastern') idx = idx.tz_convert('UTC') expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10]), + 7, 8, 9, 10], dtype=np.int32), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1])) self.assert_numpy_array_equal(idx.hour, expected) @@ -934,23 +936,27 @@ def test_tslib_tz_convert_dst(self): idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, np.array([19, 19])) + self.assert_numpy_array_equal(idx.hour, + np.array([19, 19], dtype=np.int32)) idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, np.array([5, 5])) + self.assert_numpy_array_equal(idx.hour, + np.array([5, 5], dtype=np.int32)) # End DST idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, np.array([20, 20])) + self.assert_numpy_array_equal(idx.hour, + np.array([20, 20], dtype=np.int32)) idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, np.array([4, 4])) + self.assert_numpy_array_equal(idx.hour, + np.array([4, 4], dtype=np.int32)) class TestTimeZoneCacheKey(tm.TestCase): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index b2311bf4d6661..8a420158039de 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -733,16 +733,18 @@ def test_parsers_time(self): expected_arr) self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), [None, None]) - self.assert_numpy_array_equal(tools.to_time(arg, format="%I:%M%p", - errors="ignore"), - np.array(arg)) - self.assertRaises(ValueError, - lambda: tools.to_time(arg, format="%I:%M%p", - errors="raise")) + + res = tools.to_time(arg, format="%I:%M%p", errors="ignore") + self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with tm.assertRaises(ValueError): + tools.to_time(arg, format="%I:%M%p", errors="raise") + self.assert_series_equal(tools.to_time(Series(arg, name="test")), Series(expected_arr, name="test")) + self.assert_numpy_array_equal(tools.to_time(np.array(arg)), - np.array(expected_arr)) + np.array(expected_arr, dtype=np.object_)) def test_parsers_monthfreq(self): cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index eb1d5c6c2d0d0..e1e8159a7627f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -682,21 +682,8 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, def _check_types(l, r, obj='Index'): if exact: - - if exact == 'equiv': - if type(l) != type(r): - # allow equivalence of Int64Index/RangeIndex - types = set([type(l).__name__, type(r).__name__]) - if len(types - set(['Int64Index', 'RangeIndex'])): - msg = '{0} classes are not equivalent'.format(obj) - raise_assert_detail(obj, msg, l, r) - else: - if type(l) != type(r): - msg = '{0} classes are different'.format(obj) - raise_assert_detail(obj, msg, l, r) - + assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal('dtype', l, r, obj=obj) - # allow string-like to have different inferred_types if l.inferred_type in ('string', 'unicode'): assertIn(r.inferred_type, ('string', 'unicode')) @@ -758,6 +745,7 @@ def _get_ilevel_values(index, level): else: _testing.assert_almost_equal(left.values, right.values, check_less_precise=check_less_precise, + check_dtype=exact, obj=obj, lobj=left, robj=right) # metadata comparison @@ -765,6 +753,34 @@ def _get_ilevel_values(index, level): assert_attr_equal('names', left, right, obj=obj) +def assert_class_equal(left, right, exact=True, obj='Input'): + """checks classes are equal.""" + + def repr_class(x): + if isinstance(x, Index): + # return Index as it is to include values in the error message + return x + + try: + return x.__class__.__name__ + except AttributeError: + return repr(type(x)) + + if exact == 'equiv': + if type(left) != type(right): + # allow equivalence of Int64Index/RangeIndex + types = set([type(left).__name__, type(right).__name__]) + if len(types - set(['Int64Index', 'RangeIndex'])): + msg = '{0} classes are not equivalent'.format(obj) + raise_assert_detail(obj, msg, repr_class(left), + repr_class(right)) + elif exact: + if type(left) != type(right): + msg = '{0} classes are different'.format(obj) + raise_assert_detail(obj, msg, repr_class(left), + repr_class(right)) + + def assert_attr_equal(attr, left, right, obj='Attributes'): """checks attributes are equal. Both objects must have attribute. @@ -915,54 +931,65 @@ def raise_assert_detail(obj, message, left, right): raise AssertionError(msg) -def assert_numpy_array_equal(left, right, - strict_nan=False, err_msg=None, +def assert_numpy_array_equal(left, right, strict_nan=False, + check_dtype=True, err_msg=None, obj='numpy array'): - """Checks that 'np_array' is equivalent to 'assert_equal'. + """ Checks that 'np.ndarray' is equivalent - This is similar to ``numpy.testing.assert_array_equal``, but can - check equality including ``np.nan``. Two numpy arrays are regarded as - equivalent if the arrays have equal non-NaN elements, - and `np.nan` in corresponding locations. + Parameters + ---------- + left : np.ndarray or iterable + right : np.ndarray or iterable + strict_nan : bool, default False + If True, consider NaN and None to be different. + check_dtype: bool, default True + check dtype if both a and b are np.ndarray + err_msg : str, default None + If provided, used as assertion message + obj : str, default 'numpy array' + Specify object name being compared, internally used to show appropriate + assertion message """ - # compare shape and values - if array_equivalent(left, right, strict_nan=strict_nan): - return True - - if err_msg is None: - # show detailed error - - if lib.isscalar(left) and lib.isscalar(right): - # show scalar comparison error - assert_equal(left, right) - elif is_list_like(left) and is_list_like(right): - # some test cases pass list - left = np.asarray(left) - right = np.array(right) + def _raise(left, right, err_msg): + if err_msg is None: + # show detailed error + if lib.isscalar(left) and lib.isscalar(right): + # show scalar comparison error + assert_equal(left, right) + elif is_list_like(left) and is_list_like(right): + # some test cases pass list + left = np.asarray(left) + right = np.array(right) + + if left.shape != right.shape: + raise_assert_detail(obj, '{0} shapes are different' + .format(obj), left.shape, right.shape) + + diff = 0 + for l, r in zip(left, right): + # count up differences + if not array_equivalent(l, r, strict_nan=strict_nan): + diff += 1 + + diff = diff * 100.0 / left.size + msg = '{0} values are different ({1} %)'\ + .format(obj, np.round(diff, 5)) + raise_assert_detail(obj, msg, left, right) + else: + assert_class_equal(left, right, obj=obj) - if left.shape != right.shape: - raise_assert_detail(obj, '{0} shapes are different' - .format(obj), left.shape, right.shape) + raise AssertionError(err_msg) - diff = 0 - for l, r in zip(left, right): - # count up differences - if not array_equivalent(l, r, strict_nan=strict_nan): - diff += 1 + # compare shape and values + if not array_equivalent(left, right, strict_nan=strict_nan): + _raise(left, right, err_msg) - diff = diff * 100.0 / left.size - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) - raise_assert_detail(obj, msg, left, right) - elif is_list_like(left): - msg = "First object is iterable, second isn't" - raise_assert_detail(obj, msg, left, right) - else: - msg = "Second object is iterable, first isn't" - raise_assert_detail(obj, msg, left, right) + if check_dtype: + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + assert_attr_equal('dtype', left, right, obj=obj) - raise AssertionError(err_msg) + return True # This could be refactored to use the NDFrame.equals method @@ -1007,7 +1034,10 @@ def assert_series_equal(left, right, check_dtype=True, assertIsInstance(right, Series, '[Series] ') if check_series_type: + # ToDo: There are some tests using rhs is sparse + # lhs is dense. Should use assert_class_equal in future assertIsInstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) # length comparison if len(left) != len(right): @@ -1027,7 +1057,8 @@ def assert_series_equal(left, right, check_dtype=True, if check_exact: assert_numpy_array_equal(left.get_values(), right.get_values(), - obj='{0}'.format(obj)) + obj='{0}'.format(obj), + check_dtype=check_dtype) elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check @@ -1043,10 +1074,13 @@ def assert_series_equal(left, right, check_dtype=True, msg = '[datetimelike_compat=True] {0} is not equal to {1}.' raise AssertionError(msg.format(left.values, right.values)) else: - assert_numpy_array_equal(left.values, right.values) + assert_numpy_array_equal(left.values, right.values, + check_dtype=check_dtype) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), - check_less_precise, obj='{0}'.format(obj)) + check_less_precise, + check_dtype=check_dtype, + obj='{0}'.format(obj)) # metadata comparison if check_names: @@ -1106,7 +1140,10 @@ def assert_frame_equal(left, right, check_dtype=True, assertIsInstance(right, DataFrame, '[DataFrame] ') if check_frame_type: + # ToDo: There are some tests using rhs is SparseDataFrame + # lhs is DataFrame. Should use assert_class_equal in future assertIsInstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) if check_like: left, right = left.reindex_like(right), right @@ -1172,7 +1209,7 @@ def assert_panelnd_equal(left, right, assert_func=assert_frame_equal, check_names=False): if check_panel_type: - assertIsInstance(left, type(right)) + assert_class_equal(left, right, obj=obj) for axis in ['items', 'major_axis', 'minor_axis']: left_ind = getattr(left, axis) @@ -1204,7 +1241,7 @@ def assert_sp_array_equal(left, right): assertIsInstance(left, pd.SparseArray, '[SparseArray]') assertIsInstance(right, pd.SparseArray, '[SparseArray]') - assert_almost_equal(left.sp_values, right.sp_values) + assert_numpy_array_equal(left.sp_values, right.sp_values) # SparseIndex comparison assertIsInstance(left.sp_index, pd._sparse.SparseIndex, '[SparseIndex]')