diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index a9b42b563f931..87c8645c0e3d3 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -114,6 +114,9 @@ These changes conform sparse handling to return the correct types and work to ma - Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) - Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) - Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`) +- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`) +- Bug in ``SparseArray`` mod raises ``AttributeError (:issue:`12910`) +- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`) - Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) - Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) - Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 92eb2a9230c3b..d1532d5fbd733 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -59,10 +59,7 @@ def wrapper(self, other): def _sparse_array_op(left, right, op, name): - if np.isnan(left.fill_value): - sparse_op = lambda a, b: _sparse_nanop(a, b, name) - else: - sparse_op = lambda a, b: _sparse_fillop(a, b, name) + sparse_op = lambda a, b: _sparse_op(a, b, name) if left.sp_index.equals(right.sp_index): result = op(left.sp_values, right.sp_values) @@ -79,15 +76,7 @@ def _sparse_array_op(left, right, op, name): fill_value=fill_value) -def _sparse_nanop(this, other, name): - sparse_op = getattr(splib, 'sparse_nan%s' % name) - result, result_index = sparse_op(this.sp_values, this.sp_index, - other.sp_values, other.sp_index) - - return result, result_index - - -def _sparse_fillop(this, other, name): +def _sparse_op(this, other, name): sparse_op = getattr(splib, 'sparse_%s' % name) result, result_index = sparse_op(this.sp_values, this.sp_index, this.fill_value, other.sp_values, diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index b3d30fe272d71..064c4be15dfb0 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -543,6 +543,69 @@ def test_fillna_overlap(self): tm.assert_sp_array_equal(res, exp) +class TestSparseArrayArithmetic(tm.TestCase): + + _multiprocess_can_split_ = True + + def _check_numeric_ops(self, a, b, a_dense, b_dense): + tm.assert_numpy_array_equal((a + b).to_dense(), a_dense + b_dense) + tm.assert_numpy_array_equal((b + a).to_dense(), b_dense + a_dense) + + tm.assert_numpy_array_equal((a - b).to_dense(), a_dense - b_dense) + tm.assert_numpy_array_equal((b - a).to_dense(), b_dense - a_dense) + + tm.assert_numpy_array_equal((a * b).to_dense(), a_dense * b_dense) + tm.assert_numpy_array_equal((b * a).to_dense(), b_dense * a_dense) + + tm.assert_numpy_array_equal((a / b).to_dense(), a_dense / b_dense) + tm.assert_numpy_array_equal((b / a).to_dense(), b_dense / a_dense) + + tm.assert_numpy_array_equal((a // b).to_dense(), a_dense // b_dense) + tm.assert_numpy_array_equal((b // a).to_dense(), b_dense // a_dense) + + tm.assert_numpy_array_equal((a % b).to_dense(), a_dense % b_dense) + tm.assert_numpy_array_equal((b % a).to_dense(), b_dense % a_dense) + + tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense) + tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense) + + def test_float_scalar(self): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = SparseArray(values) + self._check_numeric_ops(a, 1, values, 1) + self._check_numeric_ops(a, 0, values, 0) + + a = SparseArray(values, fill_value=0) + self._check_numeric_ops(a, 1, values, 1) + self._check_numeric_ops(a, 0, values, 0) + + a = SparseArray(values, fill_value=2) + self._check_numeric_ops(a, 1, values, 1) + self._check_numeric_ops(a, 0, values, 0) + + def test_float_array(self): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = SparseArray(values) + b = SparseArray(rvalues) + self._check_numeric_ops(a, b, values, rvalues) + self._check_numeric_ops(a, b * 0, values, rvalues * 0) + + a = SparseArray(values, fill_value=0) + b = SparseArray(rvalues) + self._check_numeric_ops(a, b, values, rvalues) + + a = SparseArray(values, fill_value=0) + b = SparseArray(rvalues, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues) + + a = SparseArray(values, fill_value=1) + b = SparseArray(rvalues, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 57baae08725c0..293e50424b075 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -269,31 +269,6 @@ def test_to_int_index(self): class TestSparseOperators(tm.TestCase): - def _nan_op_tests(self, sparse_op, python_op): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - - xdindex = xindex.to_int_index() - ydindex = yindex.to_int_index() - - x = np.arange(xindex.npoints) * 10. + 1 - y = np.arange(yindex.npoints) * 100. + 1 - - result_block_vals, rb_index = sparse_op(x, xindex, y, yindex) - result_int_vals, ri_index = sparse_op(x, xdindex, y, ydindex) - - self.assertTrue(rb_index.to_int_index().equals(ri_index)) - assert_equal(result_block_vals, result_int_vals) - - # check versus Series... - xseries = Series(x, xdindex.indices) - yseries = Series(y, ydindex.indices) - series_result = python_op(xseries, yseries).valid() - assert_equal(result_block_vals, series_result.values) - assert_equal(result_int_vals, series_result.values) - - check_cases(_check_case) def _op_tests(self, sparse_op, python_op): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): @@ -337,16 +312,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] -def make_nanoptestf(op): - def f(self): - sparse_op = getattr(splib, 'sparse_nan%s' % op) - python_op = getattr(operator, op) - self._nan_op_tests(sparse_op, python_op) - - f.__name__ = 'test_nan%s' % op - return f - - def make_optestf(op): def f(self): sparse_op = getattr(splib, 'sparse_%s' % op) @@ -358,13 +323,11 @@ def f(self): for op in check_ops: - f = make_nanoptestf(op) g = make_optestf(op) - setattr(TestSparseOperators, f.__name__, f) setattr(TestSparseOperators, g.__name__, g) - del f del g + if __name__ == '__main__': import nose # noqa nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx index 4797f3ce71618..5d523fcfc2778 100644 --- a/pandas/src/sparse.pyx +++ b/pandas/src/sparse.pyx @@ -765,20 +765,6 @@ cdef class BlockUnion(BlockMerge): ctypedef float64_t (* double_func)(float64_t a, float64_t b) -cdef inline tuple sparse_nancombine(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex, - double_func op): - # faster to convert to IntIndex - return int_nanop(x, xindex.to_int_index(), - y, yindex.to_int_index(), op) - - # if isinstance(xindex, BlockIndex): - # return block_nanop(x, xindex.to_block_index(), - # y, yindex.to_block_index(), op) - # elif isinstance(xindex, IntIndex): - # return int_nanop(x, xindex.to_int_index(), - # y, yindex.to_int_index(), op) - cdef inline tuple sparse_combine(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill, @@ -790,115 +776,6 @@ cdef inline tuple sparse_combine(ndarray x, SparseIndex xindex, float64_t xfill, return int_op(x, xindex.to_int_index(), xfill, y, yindex.to_int_index(), yfill, op) -# NaN-based arithmetic operation-- no handling of fill values -# TODO: faster to convert everything to dense? - -@cython.boundscheck(False) -cdef inline tuple block_nanop(ndarray x_, BlockIndex xindex, - ndarray y_, BlockIndex yindex, - double_func op): - cdef: - BlockIndex out_index - Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices - Py_ssize_t xbp = 0, ybp = 0, obp = 0 # block positions - Py_ssize_t xblock = 0, yblock = 0, outblock = 0 # block numbers - - ndarray[float64_t, ndim=1] x, y - ndarray[float64_t, ndim=1] out - - # suppress Cython compiler warnings due to inlining - x = x_ - y = y_ - - out_index = xindex.intersect(yindex) - out = np.empty(out_index.npoints, dtype=np.float64) - - # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: - - # I have a feeling this is inefficient - - # walk x - while xindex.locbuf[xblock] + xbp < out_index.locbuf[outblock] + obp: - xbp += 1 - xi += 1 - if xbp == xindex.lenbuf[xblock]: - xblock += 1 - xbp = 0 - - # walk y - while yindex.locbuf[yblock] + ybp < out_index.locbuf[outblock] + obp: - ybp += 1 - yi += 1 - if ybp == yindex.lenbuf[yblock]: - yblock += 1 - ybp = 0 - - out[out_i] = op(x[xi], y[yi]) - - # advance. strikes me as too complicated - xi += 1 - yi += 1 - - xbp += 1 - if xbp == xindex.lenbuf[xblock]: - xblock += 1 - xbp = 0 - - ybp += 1 - if ybp == yindex.lenbuf[yblock]: - yblock += 1 - ybp = 0 - - obp += 1 - if obp == out_index.lenbuf[outblock]: - outblock += 1 - obp = 0 - - return out, out_index - -@cython.boundscheck(False) -cdef inline tuple int_nanop(ndarray x_, IntIndex xindex, - ndarray y_, IntIndex yindex, - double_func op): - cdef: - IntIndex out_index - Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices - ndarray[int32_t, ndim=1] xindices, yindices, out_indices - ndarray[float64_t, ndim=1] x, y - ndarray[float64_t, ndim=1] out - - # suppress Cython compiler warnings due to inlining - x = x_ - y = y_ - - # need to do this first to know size of result array - out_index = xindex.intersect(yindex) - out = np.empty(out_index.npoints, dtype=np.float64) - - xindices = xindex.indices - yindices = yindex.indices - out_indices = out_index.indices - - # walk the two SparseVectors, adding matched locations... - for out_i from 0 <= out_i < out_index.npoints: - - # walk x - while xindices[xi] < out_indices[out_i]: - xi += 1 - - # walk y - while yindices[yi] < out_indices[out_i]: - yi += 1 - - out[out_i] = op(x[xi], y[yi]) - - # advance - xi += 1 - yi += 1 - - return out, out_index - @cython.boundscheck(False) cdef inline tuple block_op(ndarray x_, BlockIndex xindex, float64_t xfill, @@ -1095,19 +972,29 @@ cdef inline float64_t __rfloordiv(float64_t a, float64_t b): cdef inline float64_t __mul(float64_t a, float64_t b): return a * b + cdef inline float64_t __eq(float64_t a, float64_t b): return a == b + cdef inline float64_t __ne(float64_t a, float64_t b): return a != b + cdef inline float64_t __lt(float64_t a, float64_t b): return a < b + cdef inline float64_t __gt(float64_t a, float64_t b): return a > b -cdef inline float64_t __pow(float64_t a, float64_t b): - # NaN - if a != a or b != b: +cdef inline float64_t __mod(float64_t a, float64_t b): + if b == 0: return NaN + else: + return a % b + +cdef inline float64_t __rmod(float64_t a, float64_t b): + return __mod(b, a) + +cdef inline float64_t __pow(float64_t a, float64_t b): return a ** b cdef inline float64_t __rpow(float64_t a, float64_t b): @@ -1117,49 +1004,6 @@ cdef inline float64_t __rpow(float64_t a, float64_t b): # This probably needs to be "templated" to achieve maximum performance. # TODO: quantify performance boost to "templating" -cpdef sparse_nanadd(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __add) - -cpdef sparse_nansub(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __sub) - -cpdef sparse_nanrsub(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __rsub) - -cpdef sparse_nanmul(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __mul) - -cpdef sparse_nandiv(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __div) - -cpdef sparse_nanrdiv(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __rdiv) - -sparse_nantruediv = sparse_nandiv -sparse_nanrtruediv = sparse_nanrdiv - -cpdef sparse_nanfloordiv(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __floordiv) - -cpdef sparse_nanrfloordiv(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __rfloordiv) - -cpdef sparse_nanpow(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __pow) - -cpdef sparse_nanrpow(ndarray x, SparseIndex xindex, - ndarray y, SparseIndex yindex): - return sparse_nancombine(x, xindex, y, yindex, __rpow) - cpdef sparse_add(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, @@ -1171,7 +1015,7 @@ cpdef sparse_sub(ndarray x, SparseIndex xindex, float64_t xfill, y, yindex, yfill, __sub) cpdef sparse_rsub(ndarray x, SparseIndex xindex, float64_t xfill, - ndarray y, SparseIndex yindex, float64_t yfill): + ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __rsub) @@ -1186,7 +1030,7 @@ cpdef sparse_div(ndarray x, SparseIndex xindex, float64_t xfill, y, yindex, yfill, __div) cpdef sparse_rdiv(ndarray x, SparseIndex xindex, float64_t xfill, - ndarray y, SparseIndex yindex, float64_t yfill): + ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __rdiv) @@ -1194,22 +1038,32 @@ sparse_truediv = sparse_div sparse_rtruediv = sparse_rdiv cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill, - ndarray y, SparseIndex yindex, float64_t yfill): + ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __floordiv) cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill, - ndarray y, SparseIndex yindex, float64_t yfill): + ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __rfloordiv) +cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __mod) + +cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __rmod) + cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __pow) cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill, - ndarray y, SparseIndex yindex, float64_t yfill): + ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, y, yindex, yfill, __rpow)