diff --git a/pandas/_libs/sparse.pyi b/pandas/_libs/sparse.pyi new file mode 100644 index 0000000000000..aff1ed6cef74c --- /dev/null +++ b/pandas/_libs/sparse.pyi @@ -0,0 +1,45 @@ +from typing import ( + Sequence, + TypeVar, +) + +import numpy as np + +from pandas._typing import npt + +SparseIndexT = TypeVar("SparseIndexT", bound="SparseIndex") + +class SparseIndex: + length: int + npoints: int + def __init__(self): ... + @property + def ngaps(self) -> int: ... + @property + def nbytes(self) -> int: ... + def equals(self, other) -> bool: ... + def lookup(self, index: int) -> np.int32: ... + def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ... + def to_int_index(self) -> IntIndex: ... + def to_block_index(self) -> BlockIndex: ... + def intersect(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ... + def make_union(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ... + +class IntIndex(SparseIndex): + indices: npt.NDArray[np.int32] + def __init__( + self, length: int, indices: Sequence[int], check_integrity: bool = True + ): ... + +class BlockIndex(SparseIndex): + nblocks: int + blocs: np.ndarray + blengths: np.ndarray + def __init__(self, length: int, blocs: np.ndarray, blengths: np.ndarray): ... + +def make_mask_object_ndarray( + arr: npt.NDArray[np.object_], fill_value +) -> npt.NDArray[np.bool_]: ... +def get_blocks( + indices: npt.NDArray[np.int32], +) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ... diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 77142ef450487..0822cdddf7bd3 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1063,20 +1063,21 @@ def _concat_same_type( else: sp_kind = "integer" + sp_index: SparseIndex if sp_kind == "integer": indices = [] for arr in to_concat: - idx = arr.sp_index.to_int_index().indices.copy() - idx += length # TODO: wraparound + int_idx = arr.sp_index.to_int_index().indices.copy() + int_idx += length # TODO: wraparound length += arr.sp_index.length values.append(arr.sp_values) - indices.append(idx) + indices.append(int_idx) data = np.concatenate(values) - indices = np.concatenate(indices) - sp_index = IntIndex(length, indices) + indices_arr = np.concatenate(indices) + sp_index = IntIndex(length, indices_arr) else: # when concatenating block indices, we don't claim that you'll @@ -1088,18 +1089,18 @@ def _concat_same_type( blocs = [] for arr in to_concat: - idx = arr.sp_index.to_block_index() + block_idx = arr.sp_index.to_block_index() values.append(arr.sp_values) - blocs.append(idx.blocs.copy() + length) - blengths.append(idx.blengths) + blocs.append(block_idx.blocs.copy() + length) + blengths.append(block_idx.blengths) length += arr.sp_index.length data = np.concatenate(values) - blocs = np.concatenate(blocs) - blengths = np.concatenate(blengths) + blocs_arr = np.concatenate(blocs) + blengths_arr = np.concatenate(blengths) - sp_index = BlockIndex(length, blocs, blengths) + sp_index = BlockIndex(length, blocs_arr, blengths_arr) return cls(data, sparse_index=sp_index, fill_value=fill_value) @@ -1666,8 +1667,9 @@ def make_sparse( return sparsified_values, index, fill_value -def make_sparse_index(length, indices, kind): +def make_sparse_index(length, indices, kind) -> SparseIndex: + index: SparseIndex if kind == "block" or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens)