diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index c9f979e323445..717fe7218ceda 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -55,7 +55,7 @@ def time_sparse_series_to_coo(self): self.ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True) -class sparse_arithmetic(object): +class sparse_arithmetic_int(object): goal_time = 0.2 def setup(self): @@ -75,6 +75,12 @@ def make_sparse_array(self, length, dense_size, fill_value): arr[indexer] = np.random.randint(0, 100, len(indexer)) return pd.SparseArray(arr, fill_value=fill_value) + def time_sparse_make_union(self): + self.a_10percent.sp_index.make_union(self.b_10percent.sp_index) + + def time_sparse_intersect(self): + self.a_10percent.sp_index.intersect(self.b_10percent.sp_index) + def time_sparse_addition_10percent(self): self.a_10percent + self.b_10percent @@ -92,3 +98,45 @@ def time_sparse_division_10percent_zero(self): def time_sparse_division_1percent(self): self.a_1percent / self.b_1percent + + + +class sparse_arithmetic_block(object): + goal_time = 0.2 + + def setup(self): + np.random.seed(1) + self.a = self.make_sparse_array(length=1000000, num_blocks=1000, + block_size=10, fill_value=np.nan) + self.b = self.make_sparse_array(length=1000000, num_blocks=1000, + block_size=10, fill_value=np.nan) + + self.a_zero = self.make_sparse_array(length=1000000, num_blocks=1000, + block_size=10, fill_value=0) + self.b_zero = self.make_sparse_array(length=1000000, num_blocks=1000, + block_size=10, fill_value=np.nan) + + def make_sparse_array(self, length, num_blocks, block_size, fill_value): + a = np.array([fill_value] * length) + for block in range(num_blocks): + i = np.random.randint(0, length) + a[i:i + block_size] = np.random.randint(0, 100, len(a[i:i + block_size])) + return pd.SparseArray(a, fill_value=fill_value) + + def time_sparse_make_union(self): + self.a.sp_index.make_union(self.b.sp_index) + + def time_sparse_intersect(self): + self.a.sp_index.intersect(self.b.sp_index) + + def time_sparse_addition(self): + self.a + self.b + + def time_sparse_addition_zero(self): + self.a_zero + self.b_zero + + def time_sparse_division(self): + self.a / self.b + + def time_sparse_division_zero(self): + self.a_zero / self.b_zero diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index fee98720990e0..ef531a0a083b0 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -66,7 +66,8 @@ Deprecations Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - +- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`) +- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such case (:issue:`13082`) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 71a6e7fc043eb..352355fd55c23 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -186,6 +186,57 @@ def test_intindex_make_union(self): a.make_union(b) +class TestSparseIndexIntersect(tm.TestCase): + + def test_intersect(self): + def _check_correct(a, b, expected): + result = a.intersect(b) + assert (result.equals(expected)) + + def _check_length_exc(a, longer): + nose.tools.assert_raises(Exception, a.intersect, longer) + + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + _check_correct(xindex, yindex, expected) + _check_correct(xindex.to_int_index(), yindex.to_int_index(), + expected.to_int_index()) + + _check_length_exc(xindex, longer_index) + _check_length_exc(xindex.to_int_index(), + longer_index.to_int_index()) + + if compat.is_platform_windows(): + raise nose.SkipTest("segfaults on win-64 when all tests are run") + check_cases(_check_case) + + def test_intersect_empty(self): + xindex = IntIndex(4, np.array([], dtype=np.int32)) + yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) + self.assertTrue(xindex.intersect(yindex).equals(xindex)) + self.assertTrue(yindex.intersect(xindex).equals(xindex)) + + xindex = xindex.to_block_index() + yindex = yindex.to_block_index() + self.assertTrue(xindex.intersect(yindex).equals(xindex)) + self.assertTrue(yindex.intersect(xindex).equals(xindex)) + + def test_intersect_identical(self): + cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)), + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), + IntIndex(0, np.array([], dtype=np.int32)), + IntIndex(5, np.array([], dtype=np.int32))] + + for case in cases: + self.assertTrue(case.intersect(case).equals(case)) + case = case.to_block_index() + self.assertTrue(case.intersect(case).equals(case)) + + class TestSparseIndexCommon(tm.TestCase): _multiprocess_can_split_ = True @@ -281,32 +332,6 @@ def _check(index): # corner cases -def test_intersect(): - def _check_correct(a, b, expected): - result = a.intersect(b) - assert (result.equals(expected)) - - def _check_length_exc(a, longer): - nose.tools.assert_raises(Exception, a.intersect, longer) - - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - expected = BlockIndex(TEST_LENGTH, eloc, elen) - longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) - - _check_correct(xindex, yindex, expected) - _check_correct(xindex.to_int_index(), yindex.to_int_index(), - expected.to_int_index()) - - _check_length_exc(xindex, longer_index) - _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) - - if compat.is_platform_windows(): - raise nose.SkipTest("segfaults on win-64 when all tests are run") - check_cases(_check_case) - - class TestBlockIndex(tm.TestCase): _multiprocess_can_split_ = True @@ -345,6 +370,16 @@ def test_block_internal(self): tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + def test_make_block_boundary(self): + for i in [5, 10, 100, 101]: + idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), + kind='block') + + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, + np.ones(len(exp), dtype=np.int32)) + def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) @@ -413,6 +448,7 @@ def test_equals(self): self.assertFalse(index.equals(IntIndex(10, [0, 1, 2, 3]))) def test_to_block_index(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx index 474733a1e005c..94ae26e00f087 100644 --- a/pandas/src/sparse.pyx +++ b/pandas/src/sparse.pyx @@ -98,10 +98,9 @@ cdef class IntIndex(SparseIndex): cpdef IntIndex intersect(self, SparseIndex y_): cdef: - Py_ssize_t out_length, xi, yi = 0 + Py_ssize_t out_length, xi, yi = 0, result_indexer = 0 int32_t xind - ndarray[int32_t, ndim=1] xindices, yindices - list new_list = [] + ndarray[int32_t, ndim=1] xindices, yindices, new_indices IntIndex y # if is one already, returns self @@ -112,6 +111,7 @@ cdef class IntIndex(SparseIndex): xindices = self.indices yindices = y.indices + new_indices = np.empty(min(len(xindices), len(yindices)), dtype=np.int32) for xi from 0 <= xi < self.npoints: xind = xindices[xi] @@ -124,9 +124,11 @@ cdef class IntIndex(SparseIndex): # TODO: would a two-pass algorithm be faster? if yindices[yi] == xind: - new_list.append(xind) + new_indices[result_indexer] = xind + result_indexer += 1 - return IntIndex(self.length, new_list) + new_indices = new_indices[:result_indexer] + return IntIndex(self.length, new_indices) cpdef IntIndex make_union(self, SparseIndex y_): @@ -238,15 +240,19 @@ cdef class IntIndex(SparseIndex): cpdef get_blocks(ndarray[int32_t, ndim=1] indices): cdef: - Py_ssize_t i, npoints + Py_ssize_t init_len, i, npoints, result_indexer = 0 int32_t block, length = 1, cur, prev - list locs = [], lens = [] + ndarray[int32_t, ndim=1] locs, lens npoints = len(indices) # just handle the special empty case separately if npoints == 0: - return [], [] + return np.array([], dtype=np.int32), np.array([], dtype=np.int32) + + # block size can't be longer than npoints + locs = np.empty(npoints, dtype=np.int32) + lens = np.empty(npoints, dtype=np.int32) # TODO: two-pass algorithm faster? prev = block = indices[0] @@ -254,18 +260,22 @@ cpdef get_blocks(ndarray[int32_t, ndim=1] indices): cur = indices[i] if cur - prev > 1: # new block - locs.append(block) - lens.append(length) + locs[result_indexer] = block + lens[result_indexer] = length block = cur length = 1 + result_indexer += 1 else: # same block, increment length length += 1 prev = cur - locs.append(block) - lens.append(length) + locs[result_indexer] = block + lens[result_indexer] = length + result_indexer += 1 + locs = locs[:result_indexer] + lens = lens[:result_indexer] return locs, lens #------------------------------------------------------------------------------- @@ -398,12 +408,8 @@ cdef class BlockIndex(SparseIndex): """ cdef: BlockIndex y - ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen - - list out_blocs = [] - list out_blengths = [] - - Py_ssize_t xi = 0, yi = 0 + ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen + Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0 int32_t cur_loc, cur_length, diff y = other.to_block_index() @@ -416,6 +422,11 @@ cdef class BlockIndex(SparseIndex): yloc = y.blocs ylen = y.blengths + # block may be split, but can't exceed original len / 2 + 1 + max_len = int(min(self.length, y.length) / 2) + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + while True: # we are done (or possibly never began) if xi >= self.nblocks or yi >= y.nblocks: @@ -458,10 +469,14 @@ cdef class BlockIndex(SparseIndex): cur_length = ylen[yi] yi += 1 - out_blocs.append(cur_loc) - out_blengths.append(cur_length) + out_bloc[result_indexer] = cur_loc + out_blen[result_indexer] = cur_length + result_indexer += 1 - return BlockIndex(self.length, out_blocs, out_blengths) + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.length, out_bloc, out_blen) cpdef BlockIndex make_union(self, SparseIndex y): """ @@ -626,15 +641,19 @@ cdef class BlockUnion(BlockMerge): cdef _make_merged_blocks(self): cdef: - ndarray[int32_t, ndim=1] xstart, xend, ystart, yend + ndarray[int32_t, ndim=1] xstart, xend, ystart, yend, out_bloc, out_blen int32_t nstart, nend, diff - list out_blocs = [], out_blengths = [] + Py_ssize_t max_len, result_indexer = 0 xstart = self.xstart xend = self.xend ystart = self.ystart yend = self.yend + max_len = int(min(self.x.length, self.y.length) / 2) + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + while True: # we are done (or possibly never began) if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks: @@ -658,10 +677,14 @@ cdef class BlockUnion(BlockMerge): nstart = ystart[self.yi] nend = self._find_next_block_end(1) - out_blocs.append(nstart) - out_blengths.append(nend - nstart) + out_bloc[result_indexer] = nstart + out_blen[result_indexer] = nend - nstart + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] - return BlockIndex(self.x.length, out_blocs, out_blengths) + return BlockIndex(self.x.length, out_bloc, out_blen) cdef int32_t _find_next_block_end(self, bint mode) except -1: """