From 24146cdfd0885bbf22b2b132ee06ccf823f092f1 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 3 Apr 2016 14:30:18 +0900 Subject: [PATCH] BUG: Sparse misc fixes --- doc/source/whatsnew/v0.18.1.txt | 13 ++ pandas/io/tests/test_pickle.py | 3 +- pandas/sparse/array.py | 12 +- pandas/sparse/frame.py | 3 +- pandas/sparse/series.py | 33 ++++- pandas/sparse/tests/test_array.py | 44 +++--- pandas/sparse/tests/test_indexing.py | 84 +++++++++++ pandas/sparse/tests/test_list.py | 24 ++-- pandas/sparse/tests/test_sparse.py | 204 +++++++++++---------------- pandas/tests/test_format.py | 19 +++ pandas/util/testing.py | 80 ++++++++++- 11 files changed, 354 insertions(+), 165 deletions(-) create mode 100644 pandas/sparse/tests/test_indexing.py diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index f20b961455ba7..be8323e6b46f7 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -81,6 +81,13 @@ API changes - ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`) - ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) +- ``SparseArray.take`` now returns scalar for scalar input, ``SparseArray`` for others (:issue:`10560`) + +.. ipython:: python + + s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + s.take(0) + s.take([1, 2, 3]) .. _whatsnew_0181.apply_resample: @@ -211,3 +218,9 @@ Bug Fixes - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`) - Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`) - ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`) + + +- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`) +- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) +- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) +- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 61f78b2b619fc..e8218ca5950ba 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -10,7 +10,6 @@ import pandas as pd from pandas import Index from pandas.compat import u -from pandas.sparse.tests import test_sparse from pandas.util.misc import is_little_endian import pandas import pandas.util.testing as tm @@ -46,7 +45,7 @@ def compare_element(self, result, expected, typ, version=None): return if typ.startswith('sp_'): - comparator = getattr(test_sparse, "assert_%s_equal" % typ) + comparator = getattr(tm, "assert_%s_equal" % typ) comparator(result, expected, exact_indices=False) else: comparator = getattr(tm, "assert_%s_equal" % diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 4d8ec61e84c85..35b2a4bf6dc80 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -280,10 +280,7 @@ def __getitem__(self, key): if isinstance(key, SparseArray): key = np.asarray(key) if hasattr(key, '__len__') and len(self) != len(key): - indices = self.sp_index - if hasattr(indices, 'to_int_index'): - indices = indices.to_int_index() - data_slice = self.values.take(indices.indices)[key] + return self.take(key) else: data_slice = self.values[key] return self._constructor(data_slice) @@ -320,6 +317,11 @@ def take(self, indices, axis=0): """ if axis: raise ValueError("axis must be 0, input was {0}".format(axis)) + + if com.is_integer(indices): + # return scalar + return self[indices] + indices = np.atleast_1d(np.asarray(indices, dtype=int)) # allow -1 to indicate missing values @@ -344,7 +346,7 @@ def take(self, indices, axis=0): result = np.empty(len(indices)) result.fill(self.fill_value) - return result + return self._constructor(result) def __setitem__(self, key, value): # if com.is_integer(key): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 25f1f16831317..abc5ffef4a88d 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -543,9 +543,10 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, continue values = series.values + # .take returns SparseArray new = values.take(indexer) - if need_mask: + new = new.values np.putmask(new, mask, fill_value) new_series[col] = new diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 71790c8a544a1..6dedcdbef3174 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -354,10 +354,33 @@ def _set_subtyp(self, is_all_dates): else: object.__setattr__(self, '_subtyp', 'sparse_series') + def _ixs(self, i, axis=0): + """ + Return the i-th value or values in the SparseSeries by location + + Parameters + ---------- + i : int, slice, or sequence of integers + + Returns + ------- + value : scalar (int) or Series (slice, sequence) + """ + label = self.index[i] + if isinstance(label, Index): + return self.take(i, axis=axis, convert=True) + else: + return self._get_val_at(i) + def _get_val_at(self, loc): """ forward to the array """ return self.block.values._get_val_at(loc) + def _slice(self, slobj, axis=0, kind=None): + slobj = self.index._convert_slice_indexer(slobj, + kind=kind or 'getitem') + return self._get_values(slobj) + def __getitem__(self, key): """ @@ -382,6 +405,13 @@ def __getitem__(self, key): new_index = Index(self.index.view(ndarray)[key]) return self._constructor(dataSlice, index=new_index).__finalize__(self) + def _get_values(self, indexer): + try: + return self._constructor(self._data.get_slice(indexer), + fastpath=True).__finalize__(self) + except Exception: + return self[indexer] + def _set_with_engine(self, key, value): return self.set_value(key, value) @@ -517,7 +547,8 @@ def copy(self, deep=True): return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) - def reindex(self, index=None, method=None, copy=True, limit=None): + def reindex(self, index=None, method=None, copy=True, limit=None, + **kwargs): """ Conform SparseSeries to new Index diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index b1e731bd8e2e5..a0b505ff4a6da 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -11,15 +11,6 @@ import pandas.util.testing as tm -def assert_sp_array_equal(left, right): - assert_almost_equal(left.sp_values, right.sp_values) - assert (left.sp_index.equals(right.sp_index)) - if np.isnan(left.fill_value): - assert (np.isnan(right.fill_value)) - else: - assert (left.fill_value == right.fill_value) - - class TestSparseArray(tm.TestCase): _multiprocess_can_split_ = True @@ -29,11 +20,32 @@ def setUp(self): self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) def test_get_item(self): + + self.assertTrue(np.isnan(self.arr[1])) + self.assertEqual(self.arr[2], 1) + self.assertEqual(self.arr[7], 5) + + self.assertEqual(self.zarr[0], 0) + self.assertEqual(self.zarr[2], 1) + self.assertEqual(self.zarr[7], 5) + errmsg = re.compile("bounds") assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[11]) assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[-11]) self.assertEqual(self.arr[-1], self.arr[len(self.arr) - 1]) + def test_take(self): + self.assertTrue(np.isnan(self.arr.take(0))) + self.assertTrue(np.isscalar(self.arr.take(2))) + self.assertEqual(self.arr.take(2), np.take(self.arr_data, 2)) + self.assertEqual(self.arr.take(6), np.take(self.arr_data, 6)) + + tm.assert_sp_array_equal(self.arr.take([2, 3]), + SparseArray(np.take(self.arr_data, [2, 3]))) + tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), + SparseArray(np.take(self.arr_data, + [0, 1, 2]))) + def test_bad_take(self): assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11)) self.assertRaises(IndexError, lambda: self.arr.take(-11)) @@ -96,20 +108,20 @@ def _checkit(i): def test_getslice(self): result = self.arr[:-3] exp = SparseArray(self.arr.values[:-3]) - assert_sp_array_equal(result, exp) + tm.assert_sp_array_equal(result, exp) result = self.arr[-4:] exp = SparseArray(self.arr.values[-4:]) - assert_sp_array_equal(result, exp) + tm.assert_sp_array_equal(result, exp) # two corner cases from Series result = self.arr[-12:] exp = SparseArray(self.arr) - assert_sp_array_equal(result, exp) + tm.assert_sp_array_equal(result, exp) result = self.arr[:-12] exp = SparseArray(self.arr.values[:0]) - assert_sp_array_equal(result, exp) + tm.assert_sp_array_equal(result, exp) def test_binary_operators(self): data1 = np.random.randn(20) @@ -134,11 +146,11 @@ def _check_op(op, first, second): res2 = op(first, second.values) tm.assertIsInstance(res2, SparseArray) - assert_sp_array_equal(res, res2) + tm.assert_sp_array_equal(res, res2) res3 = op(first.values, second) tm.assertIsInstance(res3, SparseArray) - assert_sp_array_equal(res, res3) + tm.assert_sp_array_equal(res, res3) res4 = op(first, 4) tm.assertIsInstance(res4, SparseArray) @@ -169,7 +181,7 @@ def _check_inplace_op(op): def test_pickle(self): def _check_roundtrip(obj): unpickled = self.round_trip_pickle(obj) - assert_sp_array_equal(unpickled, obj) + tm.assert_sp_array_equal(unpickled, obj) _check_roundtrip(self.arr) _check_roundtrip(self.zarr) diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py new file mode 100644 index 0000000000000..384125ddc63f4 --- /dev/null +++ b/pandas/sparse/tests/test_indexing.py @@ -0,0 +1,84 @@ +# pylint: disable-msg=E1101,W0612 + +import nose # noqa +import numpy as np +import pandas as pd +import pandas.util.testing as tm + + +class TestSparseSeriesIndexing(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_loc(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) + sparse = orig.to_sparse() + + self.assertEqual(sparse.loc[0], 1) + self.assertTrue(np.isnan(sparse.loc[1])) + + result = sparse.loc[[1, 3, 4]] + exp = orig.loc[[1, 3, 4]].to_sparse() + tm.assert_sp_series_equal(result, exp) + + # exceeds the bounds + result = sparse.loc[[1, 3, 4, 5]] + exp = orig.loc[[1, 3, 4, 5]].to_sparse() + tm.assert_sp_series_equal(result, exp) + # padded with NaN + self.assertTrue(np.isnan(result[-1])) + + # dense array + result = sparse.loc[orig % 2 == 1] + exp = orig.loc[orig % 2 == 1].to_sparse() + tm.assert_sp_series_equal(result, exp) + + # sparse array (actuary it coerces to normal Series) + result = sparse.loc[sparse % 2 == 1] + exp = orig.loc[orig % 2 == 1].to_sparse() + tm.assert_sp_series_equal(result, exp) + + def test_loc_index(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE')) + sparse = orig.to_sparse() + + self.assertEqual(sparse.loc['A'], 1) + self.assertTrue(np.isnan(sparse.loc['B'])) + + result = sparse.loc[['A', 'C', 'D']] + exp = orig.loc[['A', 'C', 'D']].to_sparse() + tm.assert_sp_series_equal(result, exp) + + # dense array + result = sparse.loc[orig % 2 == 1] + exp = orig.loc[orig % 2 == 1].to_sparse() + tm.assert_sp_series_equal(result, exp) + + # sparse array (actuary it coerces to normal Series) + result = sparse.loc[sparse % 2 == 1] + exp = orig.loc[orig % 2 == 1].to_sparse() + tm.assert_sp_series_equal(result, exp) + + def test_loc_slice(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) + sparse = orig.to_sparse() + tm.assert_sp_series_equal(sparse.loc[2:], orig.loc[2:].to_sparse()) + + def test_iloc(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) + sparse = orig.to_sparse() + + self.assertEqual(sparse.iloc[3], 3) + self.assertTrue(np.isnan(sparse.iloc[2])) + + result = sparse.iloc[[1, 3, 4]] + exp = orig.iloc[[1, 3, 4]].to_sparse() + tm.assert_sp_series_equal(result, exp) + + with tm.assertRaises(IndexError): + sparse.iloc[[1, 3, 5]] + + def test_iloc_slice(self): + orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) + sparse = orig.to_sparse() + tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse()) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 7b81e483da2b0..5f8627103e18b 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -5,13 +5,7 @@ import numpy as np from pandas.sparse.api import SparseList, SparseArray -from pandas.util.testing import assert_almost_equal - -from .test_sparse import assert_sp_array_equal - - -def assert_sp_list_equal(left, right): - assert_sp_array_equal(left.to_array(), right.to_array()) +import pandas.util.testing as tm class TestSparseList(unittest.TestCase): @@ -26,7 +20,7 @@ def test_constructor(self): lst1 = SparseList(self.na_data[:5]) exp = SparseList() exp.append(self.na_data[:5]) - assert_sp_list_equal(lst1, exp) + tm.assert_sp_list_equal(lst1, exp) def test_len(self): arr = self.na_data @@ -46,7 +40,7 @@ def test_append_na(self): splist.append(arr[6:]) sparr = splist.to_array() - assert_sp_array_equal(sparr, SparseArray(arr)) + tm.assert_sp_array_equal(sparr, SparseArray(arr)) def test_append_zero(self): arr = self.zero_data @@ -56,7 +50,7 @@ def test_append_zero(self): splist.append(arr[6:]) sparr = splist.to_array() - assert_sp_array_equal(sparr, SparseArray(arr, fill_value=0)) + tm.assert_sp_array_equal(sparr, SparseArray(arr, fill_value=0)) def test_consolidate(self): arr = self.na_data @@ -70,11 +64,11 @@ def test_consolidate(self): consol = splist.consolidate(inplace=False) self.assertEqual(consol.nchunks, 1) self.assertEqual(splist.nchunks, 3) - assert_sp_array_equal(consol.to_array(), exp_sparr) + tm.assert_sp_array_equal(consol.to_array(), exp_sparr) splist.consolidate() self.assertEqual(splist.nchunks, 1) - assert_sp_array_equal(splist.to_array(), exp_sparr) + tm.assert_sp_array_equal(splist.to_array(), exp_sparr) def test_copy(self): arr = self.na_data @@ -87,7 +81,7 @@ def test_copy(self): cp = splist.copy() cp.append(arr[6:]) self.assertEqual(splist.nchunks, 2) - assert_sp_array_equal(cp.to_array(), exp_sparr) + tm.assert_sp_array_equal(cp.to_array(), exp_sparr) def test_getitem(self): arr = self.na_data @@ -97,8 +91,8 @@ def test_getitem(self): splist.append(arr[6:]) for i in range(len(arr)): - assert_almost_equal(splist[i], arr[i]) - assert_almost_equal(splist[-i], arr[-i]) + tm.assert_almost_equal(splist[i], arr[i]) + tm.assert_almost_equal(splist[-i], arr[-i]) if __name__ == '__main__': diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 3fba4c365c055..0994ae06cb453 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -10,7 +10,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal, assert_frame_equal, assert_panel_equal, assertRaisesRegexp, - assert_numpy_array_equal, assert_attr_equal) + assert_numpy_array_equal) from numpy.testing import assert_equal from pandas import Series, DataFrame, bdate_range, Panel @@ -30,9 +30,6 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.tests.frame.test_misc_api import (SafeForSparse as SparseFrameTests) - -from pandas.sparse.tests.test_array import assert_sp_array_equal - import pandas.tests.test_panel as test_panel from pandas.tests.series.test_misc_api import SharedWithSparse @@ -73,49 +70,6 @@ def _test_data2_zero(): return arr, index -def assert_sp_series_equal(a, b, exact_indices=True, check_names=True): - assert (a.index.equals(b.index)) - assert_sp_array_equal(a, b) - if check_names: - assert_attr_equal('name', a, b) - - -def assert_sp_frame_equal(left, right, exact_indices=True): - """ - exact: Series SparseIndex objects must be exactly the same, otherwise just - compare dense representations - """ - for col, series in compat.iteritems(left): - assert (col in right) - # trade-off? - - if exact_indices: - assert_sp_series_equal(series, right[col]) - else: - assert_series_equal(series.to_dense(), right[col].to_dense()) - - assert_almost_equal(left.default_fill_value, right.default_fill_value) - - # do I care? - # assert(left.default_kind == right.default_kind) - - for col in right: - assert (col in left) - - -def assert_sp_panel_equal(left, right, exact_indices=True): - for item, frame in left.iteritems(): - assert (item in right) - # trade-off? - assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) - - assert_almost_equal(left.default_fill_value, right.default_fill_value) - assert (left.default_kind == right.default_kind) - - for item in right: - assert (item in left) - - class TestSparseSeries(tm.TestCase, SharedWithSparse): _multiprocess_can_split_ = True @@ -169,10 +123,10 @@ def test_construct_DataFrame_with_sp_series(self): df.dtypes str(df) - assert_sp_series_equal(df['col'], self.bseries, check_names=False) + tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) result = df.iloc[:, 0] - assert_sp_series_equal(result, self.bseries, check_names=False) + tm.assert_sp_series_equal(result, self.bseries, check_names=False) # blocking expected = Series({'col': 'float64:sparse'}) @@ -209,8 +163,8 @@ def test_dense_to_sparse(self): series = self.bseries.to_dense() bseries = series.to_sparse(kind='block') iseries = series.to_sparse(kind='integer') - assert_sp_series_equal(bseries, self.bseries) - assert_sp_series_equal(iseries, self.iseries, check_names=False) + tm.assert_sp_series_equal(bseries, self.bseries) + tm.assert_sp_series_equal(iseries, self.iseries, check_names=False) self.assertEqual(iseries.name, self.bseries.name) self.assertEqual(len(series), len(bseries)) @@ -222,8 +176,8 @@ def test_dense_to_sparse(self): series = self.zbseries.to_dense() zbseries = series.to_sparse(kind='block', fill_value=0) ziseries = series.to_sparse(kind='integer', fill_value=0) - assert_sp_series_equal(zbseries, self.zbseries) - assert_sp_series_equal(ziseries, self.ziseries, check_names=False) + tm.assert_sp_series_equal(zbseries, self.zbseries) + tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False) self.assertEqual(ziseries.name, self.zbseries.name) self.assertEqual(len(series), len(zbseries)) @@ -251,13 +205,13 @@ def test_constructor(self): def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) - assert_sp_series_equal(result, sparse) + tm.assert_sp_series_equal(result, sparse) self.assertEqual(sparse.name, name) self.assertEqual(result.name, name) # use passed name result = SparseSeries(sparse, name='x') - assert_sp_series_equal(result, sparse, check_names=False) + tm.assert_sp_series_equal(result, sparse, check_names=False) self.assertEqual(result.name, 'x') _check_const(self.bseries, 'bseries') @@ -329,8 +283,8 @@ def test_copy_astype(self): cop2 = self.iseries.copy() - assert_sp_series_equal(cop, self.bseries) - assert_sp_series_equal(cop2, self.iseries) + tm.assert_sp_series_equal(cop, self.bseries) + tm.assert_sp_series_equal(cop2, self.iseries) # test that data is copied cop[:5] = 97 @@ -341,8 +295,8 @@ def test_copy_astype(self): zbcop = self.zbseries.copy() zicop = self.ziseries.copy() - assert_sp_series_equal(zbcop, self.zbseries) - assert_sp_series_equal(zicop, self.ziseries) + tm.assert_sp_series_equal(zbcop, self.zbseries) + tm.assert_sp_series_equal(zicop, self.ziseries) # no deep copy view = self.bseries.copy(deep=False) @@ -371,8 +325,8 @@ def test_kind(self): def test_pickle(self): def _test_roundtrip(series): unpickled = self.round_trip_pickle(series) - assert_sp_series_equal(series, unpickled) - assert_series_equal(series.to_dense(), unpickled.to_dense()) + tm.assert_sp_series_equal(series, unpickled) + tm.assert_series_equal(series.to_dense(), unpickled.to_dense()) self._check_all(_test_roundtrip) @@ -439,18 +393,18 @@ def test_getitem_slice(self): tm.assertIsInstance(res, SparseSeries) expected = self.bseries.reindex(idx[::2]) - assert_sp_series_equal(res, expected) + tm.assert_sp_series_equal(res, expected) res = self.bseries[:5] tm.assertIsInstance(res, SparseSeries) - assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) + tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] - assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) + tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:])) # negative indices res = self.bseries[:-3] - assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) + tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3])) def test_take(self): def _compare_with_dense(sp): @@ -529,7 +483,7 @@ def check(a, b): # with dense result = self.bseries + self.bseries.to_dense() - assert_sp_series_equal(result, self.bseries + self.bseries) + tm.assert_sp_series_equal(result, self.bseries + self.bseries) def test_binary_operators(self): @@ -541,7 +495,7 @@ def _check_inplace_op(iop, op): expected = op(tmp, self.bseries) iop(tmp, self.bseries) - assert_sp_series_equal(tmp, expected) + tm.assert_sp_series_equal(tmp, expected) inplace_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow'] for op in inplace_ops: @@ -552,15 +506,15 @@ def test_abs(self): s = SparseSeries([1, 2, -3], name='x') expected = SparseSeries([1, 2, 3], name='x') result = s.abs() - assert_sp_series_equal(result, expected) + tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = abs(s) - assert_sp_series_equal(result, expected) + tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') result = np.abs(s) - assert_sp_series_equal(result, expected) + tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') def test_reindex(self): @@ -571,8 +525,8 @@ def _compare_with_series(sps, new_index): seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) - assert_sp_series_equal(spsre, seriesre) - assert_series_equal(spsre.to_dense(), seriesre.to_dense()) + tm.assert_sp_series_equal(spsre, seriesre) + tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) @@ -585,7 +539,7 @@ def _compare_with_series(sps, new_index): # special cases same_index = self.bseries.reindex(self.bseries.index) - assert_sp_series_equal(self.bseries, same_index) + tm.assert_sp_series_equal(self.bseries, same_index) self.assertIsNot(same_index, self.bseries) # corner cases @@ -761,7 +715,7 @@ def test_shift(self): shifted = series.shift(0) self.assertIsNot(shifted, series) - assert_sp_series_equal(shifted, series) + tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) @@ -798,8 +752,8 @@ def test_combine_first(self): expected = s[::2].to_dense().combine_first(s.to_dense()) expected = expected.to_sparse(fill_value=s.fill_value) - assert_sp_series_equal(result, result2) - assert_sp_series_equal(result, expected) + tm.assert_sp_series_equal(result, result2) + tm.assert_sp_series_equal(result, expected) class TestSparseHandlingMultiIndexes(tm.TestCase): @@ -926,13 +880,13 @@ def test_to_coo_duplicate_index_entries(self): def test_from_coo_dense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True) check = self.sparse_series[2] - assert_sp_series_equal(ss, check) + tm.assert_sp_series_equal(ss, check) def test_from_coo_nodense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=False) check = self.sparse_series[2] check = check.dropna().to_sparse() - assert_sp_series_equal(ss, check) + tm.assert_sp_series_equal(ss, check) def _run_test(self, ss, kwargs, check): results = ss.to_coo(**kwargs) @@ -1009,7 +963,7 @@ def test_as_matrix(self): def test_copy(self): cp = self.frame.copy() tm.assertIsInstance(cp, SparseDataFrame) - assert_sp_frame_equal(cp, self.frame) + tm.assert_sp_frame_equal(cp, self.frame) # as of v0.15.0 # this is now identical (but not is_a ) @@ -1037,7 +991,7 @@ def test_constructor(self): data[c] = s.to_dict() sdf = SparseDataFrame(data) - assert_sp_frame_equal(sdf, self.frame) + tm.assert_sp_frame_equal(sdf, self.frame) # TODO: test data is copied from inputs @@ -1048,7 +1002,7 @@ def test_constructor(self): default_fill_value=self.frame.default_fill_value, default_kind=self.frame.default_kind, copy=True) reindexed = self.frame.reindex(idx) - assert_sp_frame_equal(cons, reindexed, exact_indices=False) + tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex self.assertRaises(TypeError, self.frame.reindex, idx, level=0) @@ -1061,7 +1015,7 @@ def test_constructor_ndarray(self): # 1d sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A']) - assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) + tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) # raise on level argument self.assertRaises(TypeError, self.frame.reindex, columns=['A'], @@ -1082,7 +1036,7 @@ def test_constructor_empty(self): def test_constructor_dataframe(self): dense = self.frame.to_dense() sp = SparseDataFrame(dense) - assert_sp_frame_equal(sp, self.frame) + tm.assert_sp_frame_equal(sp, self.frame) def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) @@ -1145,7 +1099,7 @@ def test_array_interface(self): def test_pickle(self): def _test_roundtrip(frame): result = self.round_trip_pickle(frame) - assert_sp_frame_equal(frame, result) + tm.assert_sp_frame_equal(frame, result) _test_roundtrip(SparseDataFrame()) self._check_all(_test_roundtrip) @@ -1230,14 +1184,14 @@ def _compare_to_dense(a, b, da, db, op): dense_result = op(da, db) dense_result = dense_result.to_sparse(fill_value=fill) - assert_sp_frame_equal(sparse_result, dense_result, - exact_indices=False) + tm.assert_sp_frame_equal(sparse_result, dense_result, + exact_indices=False) if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) tm.assertIsInstance(mixed_result, SparseDataFrame) - assert_sp_frame_equal(mixed_result, sparse_result, - exact_indices=False) + tm.assert_sp_frame_equal(mixed_result, sparse_result, + exact_indices=False) opnames = ['add', 'sub', 'mul', 'truediv', 'floordiv'] ops = [getattr(operator, name) for name in opnames] @@ -1296,7 +1250,7 @@ def test_getitem(self): result = sdf[['a', 'b']] exp = sdf.reindex(columns=['a', 'b']) - assert_sp_frame_equal(result, exp) + tm.assert_sp_frame_equal(result, exp) self.assertRaises(Exception, sdf.__getitem__, ['a', 'd']) @@ -1306,7 +1260,7 @@ def test_icol(self): # 2227 result = self.frame.iloc[:, 0] self.assertTrue(isinstance(result, SparseSeries)) - assert_sp_series_equal(result, self.frame['A']) + tm.assert_sp_series_equal(result, self.frame['A']) # preserve sparse index type. #2251 data = {'A': [0, 1]} @@ -1339,17 +1293,17 @@ def test_fancy_index_misc(self): # axis = 0 sliced = self.frame.ix[-2:, :] expected = self.frame.reindex(index=self.frame.index[-2:]) - assert_sp_frame_equal(sliced, expected) + tm.assert_sp_frame_equal(sliced, expected) # axis = 1 sliced = self.frame.ix[:, -2:] expected = self.frame.reindex(columns=self.frame.columns[-2:]) - assert_sp_frame_equal(sliced, expected) + tm.assert_sp_frame_equal(sliced, expected) def test_getitem_overload(self): # slicing sl = self.frame[:20] - assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20])) + tm.assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20])) # boolean indexing d = self.frame.index[5] @@ -1368,7 +1322,8 @@ def _check_frame(frame): # insert SparseSeries frame['E'] = frame['A'] tm.assertIsInstance(frame['E'], SparseSeries) - assert_sp_series_equal(frame['E'], frame['A'], check_names=False) + tm.assert_sp_series_equal(frame['E'], frame['A'], + check_names=False) # insert SparseSeries differently-indexed to_insert = frame['A'][::2] @@ -1382,7 +1337,8 @@ def _check_frame(frame): # insert Series frame['F'] = frame['A'].to_dense() tm.assertIsInstance(frame['F'], SparseSeries) - assert_sp_series_equal(frame['F'], frame['A'], check_names=False) + tm.assert_sp_series_equal(frame['F'], frame['A'], + check_names=False) # insert Series differently-indexed to_insert = frame['A'].to_dense()[::2] @@ -1417,21 +1373,21 @@ def _check_frame(frame): def test_setitem_corner(self): self.frame['a'] = self.frame['B'] - assert_sp_series_equal(self.frame['a'], self.frame['B'], - check_names=False) + tm.assert_sp_series_equal(self.frame['a'], self.frame['B'], + check_names=False) def test_setitem_array(self): arr = self.frame['B'] self.frame['E'] = arr - assert_sp_series_equal(self.frame['E'], self.frame['B'], - check_names=False) + tm.assert_sp_series_equal(self.frame['E'], self.frame['B'], + check_names=False) self.frame['F'] = arr[:-1] index = self.frame.index[:-1] - assert_sp_series_equal(self.frame['E'].reindex(index), - self.frame['F'].reindex(index), - check_names=False) + tm.assert_sp_series_equal(self.frame['E'].reindex(index), + self.frame['F'].reindex(index), + check_names=False) def test_delitem(self): A = self.frame['A'] @@ -1439,8 +1395,8 @@ def test_delitem(self): del self.frame['B'] self.assertNotIn('B', self.frame) - assert_sp_series_equal(self.frame['A'], A) - assert_sp_series_equal(self.frame['C'], C) + tm.assert_sp_series_equal(self.frame['A'], A) + tm.assert_sp_series_equal(self.frame['C'], C) del self.frame['D'] self.assertNotIn('D', self.frame) @@ -1463,13 +1419,13 @@ def test_append(self): b = self.frame[5:] appended = a.append(b) - assert_sp_frame_equal(appended, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(appended, self.frame, exact_indices=False) a = self.frame.ix[:5, :3] b = self.frame.ix[5:] appended = a.append(b) - assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3], - exact_indices=False) + tm.assert_sp_frame_equal(appended.ix[:, :3], self.frame.ix[:, :3], + exact_indices=False) def test_apply(self): applied = self.frame.apply(np.sqrt) @@ -1518,12 +1474,12 @@ def test_fillna(self): df = self.zframe.reindex(lrange(5)) result = df.fillna(0) expected = df.to_dense().fillna(0).to_sparse(fill_value=0) - assert_sp_frame_equal(result, expected, exact_indices=False) + tm.assert_sp_frame_equal(result, expected, exact_indices=False) result = df.copy() result.fillna(0, inplace=True) expected = df.to_dense().fillna(0).to_sparse(fill_value=0) - assert_sp_frame_equal(result, expected, exact_indices=False) + tm.assert_sp_frame_equal(result, expected, exact_indices=False) result = df.copy() result = df['A'] @@ -1549,7 +1505,7 @@ def test_join(self): left = self.frame.ix[:, ['A', 'B']] right = self.frame.ix[:, ['C', 'D']] joined = left.join(right) - assert_sp_frame_equal(joined, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) right = self.frame.ix[:, ['B', 'D']] self.assertRaises(Exception, left.join, right) @@ -1620,12 +1576,12 @@ def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) result = self.zframe.reindex(rng, fill_value=0) expected = self.zframe.reindex(rng).fillna(0) - assert_sp_frame_equal(result, expected) + tm.assert_sp_frame_equal(result, expected) def test_take(self): result = self.frame.take([1, 0, 2], axis=1) expected = self.frame.reindex(columns=['B', 'A', 'C']) - assert_sp_frame_equal(result, expected) + tm.assert_sp_frame_equal(result, expected) def test_to_dense(self): def _check(frame): @@ -1657,14 +1613,14 @@ def test_transpose(self): def _check(frame): transposed = frame.T untransposed = transposed.T - assert_sp_frame_equal(frame, untransposed) + tm.assert_sp_frame_equal(frame, untransposed) self._check_all(_check) def test_shift(self): def _check(frame): shifted = frame.shift(0) - assert_sp_frame_equal(shifted, frame) + tm.assert_sp_frame_equal(shifted, frame) f = lambda s: s.shift(1) _dense_frame_compare(frame, f) @@ -1712,8 +1668,8 @@ def test_combine_first(self): expected = df[::2].to_dense().combine_first(df.to_dense()) expected = expected.to_sparse(fill_value=df.default_fill_value) - assert_sp_frame_equal(result, result2) - assert_sp_frame_equal(result, expected) + tm.assert_sp_frame_equal(result, result2) + tm.assert_sp_frame_equal(result, expected) def test_combine_add(self): df = self.frame.to_dense() @@ -1723,7 +1679,7 @@ def test_combine_add(self): result = df.to_sparse().add(df2.to_sparse(), fill_value=0) expected = df.add(df2, fill_value=0).to_sparse() - assert_sp_frame_equal(result, expected) + tm.assert_sp_frame_equal(result, expected) def test_isin(self): sparse_df = DataFrame({'flag': [1., 0., 1.]}).to_sparse(fill_value=0.) @@ -1814,7 +1770,7 @@ class TestSparsePanel(tm.TestCase, test_panel.SafeForLongAndSparse, @classmethod def assert_panel_equal(cls, x, y): - assert_sp_panel_equal(x, y) + tm.assert_sp_panel_equal(x, y) def setUp(self): self.data_dict = { @@ -1831,7 +1787,7 @@ def _test_op(panel, op): # arithmetic tests with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = op(panel, 1) - assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) + tm.assert_sp_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) def test_constructor(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -1858,7 +1814,7 @@ def test_constructor_empty(self): def test_from_dict(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): fd = SparsePanel.from_dict(self.data_dict) - assert_sp_panel_equal(fd, self.panel) + tm.assert_sp_panel_equal(fd, self.panel) def test_pickle(self): def _test_roundtrip(panel): @@ -1866,7 +1822,7 @@ def _test_roundtrip(panel): tm.assertIsInstance(result.items, Index) tm.assertIsInstance(result.major_axis, Index) tm.assertIsInstance(result.minor_axis, Index) - assert_sp_panel_equal(panel, result) + tm.assert_sp_panel_equal(panel, result) _test_roundtrip(self.panel) @@ -1911,8 +1867,8 @@ def test_setitem(self): self.panel['ItemE'] = self.panel['ItemC'] self.panel['ItemF'] = self.panel['ItemC'].to_dense() - assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC']) - assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC']) + tm.assert_sp_frame_equal(self.panel['ItemE'], self.panel['ItemC']) + tm.assert_sp_frame_equal(self.panel['ItemF'], self.panel['ItemC']) expected = pd.Index(['ItemA', 'ItemB', 'ItemC', 'ItemD', 'ItemE', 'ItemF']) @@ -1945,7 +1901,7 @@ def test_delitem_pop(self): def test_copy(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): cop = self.panel.copy() - assert_sp_panel_equal(cop, self.panel) + tm.assert_sp_panel_equal(cop, self.panel) def test_reindex(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index e8ad776fd5578..1a97f698b1eda 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -3198,6 +3198,7 @@ def test_period(self): class TestSeriesFormatting(tm.TestCase): + _multiprocess_can_split_ = True def setUp(self): @@ -3710,6 +3711,24 @@ def test_to_string_header(self): exp = '0 0\n ..\n9 9' self.assertEqual(res, exp) + def test_sparse_max_row(self): + s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse() + result = repr(s) + exp = ("0 1.0\n1 NaN\n2 NaN\n3 3.0\n" + "4 NaN\ndtype: float64\nBlockIndex\n" + "Block locations: array([0, 3], dtype=int32)\n" + "Block lengths: array([1, 1], dtype=int32)") + self.assertEqual(result, exp) + + with option_context("display.max_rows", 3): + # GH 10560 + result = repr(s) + exp = ("0 1.0\n ... \n4 NaN\n" + "dtype: float64\nBlockIndex\n" + "Block locations: array([0, 3], dtype=int32)\n" + "Block lengths: array([1, 1], dtype=int32)") + self.assertEqual(result, exp) + class TestEngFormatter(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e40aafeeabe2f..445ceef23b908 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -536,10 +536,10 @@ def _valid_locales(locales, normalize): return list(filter(_can_set_locale, map(normalizer, locales))) - # ----------------------------------------------------------------------------- # Console debugging tools + def debug(f, *args, **kwargs): from pdb import Pdb as OldPdb try: @@ -1194,6 +1194,84 @@ def assert_panelnd_equal(left, right, assert_func=assert_panel_equal) +# ----------------------------------------------------------------------------- +# Sparse + + +def assert_sp_array_equal(left, right): + assertIsInstance(left, pd.SparseArray, '[SparseArray]') + assertIsInstance(right, pd.SparseArray, '[SparseArray]') + + assert_almost_equal(left.sp_values, right.sp_values) + assert (left.sp_index.equals(right.sp_index)) + if np.isnan(left.fill_value): + assert (np.isnan(right.fill_value)) + else: + assert (left.fill_value == right.fill_value) + + +def assert_sp_series_equal(left, right, exact_indices=True, check_names=True): + assertIsInstance(left, pd.SparseSeries, '[SparseSeries]') + assertIsInstance(right, pd.SparseSeries, '[SparseSeries]') + + assert (left.index.equals(right.index)) + assert_sp_array_equal(left.block.values, right.block.values) + if check_names: + assert_attr_equal('name', left, right) + + +def assert_sp_frame_equal(left, right, exact_indices=True): + """ + exact: Series SparseIndex objects must be exactly the same, otherwise just + compare dense representations + """ + assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]') + assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]') + + for col, series in compat.iteritems(left): + assert (col in right) + # trade-off? + + if exact_indices: + assert_sp_series_equal(series, right[col]) + else: + assert_series_equal(series.to_dense(), right[col].to_dense()) + + assert_almost_equal(left.default_fill_value, right.default_fill_value) + + # do I care? + # assert(left.default_kind == right.default_kind) + + for col in right: + assert (col in left) + + +def assert_sp_panel_equal(left, right, exact_indices=True): + assertIsInstance(left, pd.SparsePanel, '[SparsePanel]') + assertIsInstance(right, pd.SparsePanel, '[SparsePanel]') + + for item, frame in left.iteritems(): + assert (item in right) + # trade-off? + assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) + + assert_almost_equal(left.default_fill_value, right.default_fill_value) + assert (left.default_kind == right.default_kind) + + for item in right: + assert (item in left) + + +def assert_sp_list_equal(left, right): + assertIsInstance(left, pd.SparseList, '[SparseList]') + assertIsInstance(right, pd.SparseList, '[SparseList]') + + assert_sp_array_equal(left.to_array(), right.to_array()) + +# ----------------------------------------------------------------------------- +# Others + + def assert_contains_all(iterable, dic): for k in iterable: assert k in dic, "Did not contain item: '%r'" % k