diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index c8c1a962e6861..bcc3edab4a349 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels): self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels) +class ToCooFrame: + def setup(self): + N = 10000 + k = 10 + arr = np.full((N, k), np.nan) + arr[0, 0] = 3.0 + arr[12, 7] = -1.0 + arr[0, 9] = 11.2 + self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float")) + + def time_to_coo(self): + self.df.sparse.to_coo() + + class Arithmetic: params = ([0.1, 0.01], [0, np.nan]) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f19b0fe10fe6e..2214b85ba424a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -431,6 +431,7 @@ Reshaping Sparse ^^^^^^ +- Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`) - - diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index f3eccd6aad444..60a316b79df2e 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -339,12 +339,11 @@ def to_coo(self): dtype = dtype.subtype cols, rows, data = [], [], [] - for col, name in enumerate(self._parent): - s = self._parent[name] - row = s.array.sp_index.to_int_index().indices + for col, (_, ser) in enumerate(self._parent.iteritems()): + row = ser.array.sp_index.to_int_index().indices cols.append(np.repeat(col, len(row))) rows.append(row) - data.append(s.array.sp_values.astype(dtype, copy=False)) + data.append(ser.array.sp_values.astype(dtype, copy=False)) cols = np.concatenate(cols) rows = np.concatenate(rows) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 10f5a7e9a1dc4..6b8dc8821d4fa 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -71,7 +71,9 @@ def test_from_spmatrix_columns(self, columns): expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2)]) + @pytest.mark.parametrize( + "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)] + ) @td.skip_if_no_scipy def test_to_coo(self, colnames): import scipy.sparse