diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index cee41f248fc60..386fe3ce2160f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1124,6 +1124,7 @@ Sparse - Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`) - The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`) - Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`) +- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4996a10002c63..b18a58da3950f 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -866,11 +866,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: if self.sp_index.npoints == 0: # Avoid taking from the empty self.sp_values - taken = np.full( - sp_indexer.shape, - fill_value=fill_value, - dtype=np.result_type(type(fill_value)), - ) + _dtype = np.result_type(self.dtype.subtype, type(fill_value)) + taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype) else: taken = self.sp_values.take(sp_indexer) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 3865ea64ee479..3fa3c9303806f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -21,7 +21,6 @@ notna, ) import pandas._testing as tm -from pandas.arrays import SparseArray import pandas.core.common as com from pandas.core.indexing import IndexingError @@ -1907,20 +1906,6 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[[1, -1], 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) - def test_getitem_sparse_column(self): - # https://github.com/pandas-dev/pandas/issues/23559 - data = SparseArray([0, 1]) - df = pd.DataFrame({"A": data}) - expected = pd.Series(data, name="A") - result = df["A"] - tm.assert_series_equal(result, expected) - - result = df.iloc[:, 0] - tm.assert_series_equal(result, expected) - - result = df.loc[:, "A"] - tm.assert_series_equal(result, expected) - def test_setitem_with_unaligned_tz_aware_datetime_column(self): # GH 12981 # Assignment of unaligned offset-aware datetime series. diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py new file mode 100644 index 0000000000000..876fbe212c466 --- /dev/null +++ b/pandas/tests/frame/indexing/test_sparse.py @@ -0,0 +1,51 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import SparseArray +from pandas.core.arrays.sparse import SparseDtype + + +class TestSparseDataFrameIndexing: + def test_getitem_sparse_column(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = SparseArray([0, 1]) + df = pd.DataFrame({"A": data}) + expected = pd.Series(data, name="A") + result = df["A"] + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, "A"] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) + @td.skip_if_no_scipy + def test_locindexer_from_spmatrix(self, spmatrix_t, dtype): + import scipy.sparse + + spmatrix_t = getattr(scipy.sparse, spmatrix_t) + + # The bug is triggered by a sparse matrix with purely sparse columns. So the + # recipe below generates a rectangular matrix of dimension (5, 7) where all the + # diagonal cells are ones, meaning the last two columns are purely sparse. + rows, cols = 5, 7 + spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) + df = pd.DataFrame.sparse.from_spmatrix(spmatrix) + + # regression test for #34526 + itr_idx = range(2, rows) + result = df.loc[itr_idx].values + expected = spmatrix.toarray()[itr_idx] + tm.assert_numpy_array_equal(result, expected) + + # regression test for #34540 + result = df.loc[itr_idx].dtypes.values + expected = np.full(cols, SparseDtype(dtype, fill_value=0)) + tm.assert_numpy_array_equal(result, expected)