From cd922b6b81f1377f7a7ba6019694e78da5aa9bc1 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 20 Jun 2024 19:13:30 +0100
Subject: [PATCH 01/15] BUG: :bug: :sparkles: Add fill_value param to
 from_spmatrix method.

---
 pandas/core/arrays/sparse/accessor.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index 6a1c25711acb0..6213f2a5392f2 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -265,7 +265,9 @@ def _validate(self, data) -> None:
             raise AttributeError(self._validation_msg)
 
     @classmethod
-    def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
+    def from_spmatrix(
+        cls, data, index=None, columns=None, fill_value=None
+    ) -> DataFrame:
         """
         Create a new DataFrame from a scipy sparse matrix.
 
@@ -276,6 +278,21 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         index, columns : Index, optional
             Row and column labels to use for the resulting DataFrame.
             Defaults to a RangeIndex.
+        fill_value : scalar, optional
+            The scalar value not stored in the columns. By default, this
+            depends on the dtype of `data`.
+
+            =========== ==========
+            dtype       na_value
+            =========== ==========
+            float       ``np.nan``
+            int         ``0``
+            bool        ``False``
+            datetime64  ``pd.NaT``
+            timedelta64 ``pd.NaT``
+            =========== ==========
+
+            The default value may be overridden by specifying a `fill_value`.
 
         Returns
         -------
@@ -313,7 +330,7 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         indices = data.indices
         indptr = data.indptr
         array_data = data.data
-        dtype = SparseDtype(array_data.dtype, 0)
+        dtype = SparseDtype(array_data.dtype, fill_value)
         arrays = []
         for i in range(n_columns):
             sl = slice(indptr[i], indptr[i + 1])

From 9323e4378ed04d73820830ad17f76b75d06bc2c7 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 20 Jun 2024 19:45:44 +0100
Subject: [PATCH 02/15] ENH: :sparkles: Set explicit fill_value of NaN for
 complex floats.

---
 pandas/core/arrays/sparse/accessor.py | 1 +
 pandas/core/dtypes/dtypes.py          | 1 +
 pandas/core/dtypes/missing.py         | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index 6213f2a5392f2..8127b757ca63b 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -286,6 +286,7 @@ def from_spmatrix(
             dtype       na_value
             =========== ==========
             float       ``np.nan``
+            complex     ``np.nan``
             int         ``0``
             bool        ``False``
             datetime64  ``pd.NaT``
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 5213be8b69016..d38254d98553e 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1683,6 +1683,7 @@ class SparseDtype(ExtensionDtype):
         dtype       na_value
         =========== ==========
         float       ``np.nan``
+        complex     ``np.nan``
         int         ``0``
         bool        ``False``
         datetime64  ``pd.NaT``
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index f0e21136f8a97..b9cd6ae2f13e8 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -618,6 +618,8 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     nan
     >>> na_value_for_dtype(np.dtype("float64"))
     nan
+    >>> na_value_for_dtype(np.dtype("complex128"))
+    nan
     >>> na_value_for_dtype(np.dtype("bool"))
     False
     >>> na_value_for_dtype(np.dtype("datetime64[ns]"))
@@ -629,7 +631,7 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
     elif dtype.kind in "mM":
         unit = np.datetime_data(dtype)[0]
         return dtype.type("NaT", unit)
-    elif dtype.kind == "f":
+    elif dtype.kind in "fc":
         return np.nan
     elif dtype.kind in "iu":
         if compat:

From 212a66447cc0e56065b3686dffa0250892430477 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 20 Jun 2024 20:06:28 +0100
Subject: [PATCH 03/15] TST: :white_check_mark: Fix failing tests.

---
 pandas/tests/arrays/sparse/test_accessor.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index 87eb7bcfa9cee..b8b35044f18fb 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -112,7 +112,9 @@ def test_from_spmatrix(self, format, labels, dtype):
         sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
 
         mat = sp_sparse.eye(10, format=format, dtype=dtype)
-        result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
+        result = pd.DataFrame.sparse.from_spmatrix(
+            mat, index=labels, columns=labels, fill_value=0
+        )
         expected = pd.DataFrame(
             np.eye(10, dtype=dtype), index=labels, columns=labels
         ).astype(sp_dtype)
@@ -124,7 +126,7 @@ def test_from_spmatrix_including_explicit_zero(self, format):
 
         mat = sp_sparse.random(10, 2, density=0.5, format=format)
         mat.data[0] = 0
-        result = pd.DataFrame.sparse.from_spmatrix(mat)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0)
         dtype = SparseDtype("float64", 0.0)
         expected = pd.DataFrame(mat.todense()).astype(dtype)
         tm.assert_frame_equal(result, expected)
@@ -139,7 +141,7 @@ def test_from_spmatrix_columns(self, columns):
         dtype = SparseDtype("float64", 0.0)
 
         mat = sp_sparse.random(10, 2, density=0.5)
-        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns, fill_value=0)
         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
         tm.assert_frame_equal(result, expected)
 

From 81b33f5f502da89057c9e178d7a2e5a089fc6ad0 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 20 Jun 2024 22:24:57 +0100
Subject: [PATCH 04/15] TST: :white_check_mark: Add tests for from_spmatrix
 method.

---
 pandas/tests/arrays/sparse/test_accessor.py | 20 +++++++++++++++++++-
 pandas/tests/dtypes/test_missing.py         |  4 ++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index b8b35044f18fb..163161f15a952 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -105,7 +105,7 @@ def test_accessor_raises(self):
 
     @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
     @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
-    @pytest.mark.parametrize("dtype", ["float64", "int64"])
+    @pytest.mark.parametrize("dtype", ["complex128", "float64", "int64"])
     def test_from_spmatrix(self, format, labels, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
@@ -145,6 +145,24 @@ def test_from_spmatrix_columns(self, columns):
         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "dtype, fill_value",
+        [("bool", False), ("float64", np.nan), ("complex128", np.nan)],
+    )
+    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
+    def test_from_spmatrix_fill_value(self, format, dtype, fill_value):
+        sp_sparse = pytest.importorskip("scipy.sparse")
+
+        sp_dtype = SparseDtype(dtype, fill_value)
+
+        sp_mat = sp_sparse.eye(10, format=format, dtype=dtype)
+        result = pd.DataFrame.sparse.from_spmatrix(sp_mat, fill_value=fill_value)
+        mat = np.eye(10, dtype=dtype)
+        expected = pd.DataFrame(
+            np.ma.array(mat, mask=(mat == 0)).filled(fill_value)
+        ).astype(sp_dtype)
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize(
         "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
     )
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index 2109c794ad44f..54d618f7b0766 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -697,6 +697,10 @@ def test_array_equivalent_index_with_tuples():
         ("f2", np.nan),
         ("f4", np.nan),
         ("f8", np.nan),
+        # Complex
+        ("c8", np.nan),
+        ("c16", np.nan),
+        ("c32", np.nan),
         # Object
         ("O", np.nan),
         # Interval

From f5f1479aa2aaaaec0a2979cc8f6480bdb8e6a42d Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Fri, 21 Jun 2024 11:33:25 +0100
Subject: [PATCH 05/15] DOC: :memo: Add what's new entry.

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index b952ffd7661a7..18ad7e3ccedaf 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -584,7 +584,7 @@ Reshaping
 Sparse
 ^^^^^^
 - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
--
+- Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^

From 57367aaaae290875e793274aac14a1cff8979d87 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Fri, 21 Jun 2024 12:15:46 +0100
Subject: [PATCH 06/15] TST: :white_check_mark: Fix failing tests for sparse
 getitem.

---
 pandas/tests/indexing/test_loc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 16f3e0fd0c229..a2a34314e2801 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1292,7 +1292,7 @@ def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         # diagonal cells are ones, meaning the last two columns are purely sparse.
         rows, cols = 5, 7
         spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
-        df = DataFrame.sparse.from_spmatrix(spmatrix)
+        df = DataFrame.sparse.from_spmatrix(spmatrix, fill_value=0)
 
         # regression test for GH#34526
         itr_idx = range(2, rows)
@@ -1314,7 +1314,7 @@ def test_loc_getitem_sparse_frame(self):
         # GH34687
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5))
+        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5), fill_value=0)
         result = df.loc[range(2)]
         expected = DataFrame(
             [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],

From e90963af397ef1d7c32c4c6d03f29079ec1b471a Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Fri, 21 Jun 2024 12:25:01 +0100
Subject: [PATCH 07/15] TST: :white_check_mark: Remove test for 256-bit complex
 float.

---
 pandas/tests/dtypes/test_missing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index 54d618f7b0766..f86ed6f49759f 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -700,7 +700,6 @@ def test_array_equivalent_index_with_tuples():
         # Complex
         ("c8", np.nan),
         ("c16", np.nan),
-        ("c32", np.nan),
         # Object
         ("O", np.nan),
         # Interval

From eb222a6bb151b8d262bc3df9a5235f958c9274e3 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Fri, 21 Jun 2024 13:30:15 +0100
Subject: [PATCH 08/15] DOC: :memo: Update example in docstring for
 from_spmatrix method.

---
 pandas/core/arrays/sparse/accessor.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index 8127b757ca63b..d13c2f471f1af 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -310,11 +310,11 @@ def from_spmatrix(
         --------
         >>> import scipy.sparse
         >>> mat = scipy.sparse.eye(3, dtype=float)
-        >>> pd.DataFrame.sparse.from_spmatrix(mat)
+        >>> pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0.0)
              0    1    2
-        0  1.0    0    0
-        1    0  1.0    0
-        2    0    0  1.0
+        0  1.0  0.0  0.0
+        1  0.0  1.0  0.0
+        2  0.0  0.0  1.0
         """
         from pandas._libs.sparse import IntIndex
 

From 221c4aabce136e27ebe85d59f9008daf1edd6153 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Fri, 21 Jun 2024 14:26:27 +0100
Subject: [PATCH 09/15] DOC: :memo: Update some docstrings and sparse user
 guide.

---
 doc/source/user_guide/sparse.rst      | 2 +-
 pandas/core/arrays/sparse/accessor.py | 4 ++--
 pandas/core/dtypes/dtypes.py          | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
index 25bcb8bcc0c93..03105d62821a0 100644
--- a/doc/source/user_guide/sparse.rst
+++ b/doc/source/user_guide/sparse.rst
@@ -188,7 +188,7 @@ Use :meth:`DataFrame.sparse.from_spmatrix` to create a :class:`DataFrame` with s
    sp_arr = csr_matrix(arr)
    sp_arr
 
-   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr)
+   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr, fill_value=0)
    sdf.head()
    sdf.dtypes
 
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index d13c2f471f1af..3af394872f387 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -280,7 +280,7 @@ def from_spmatrix(
             Defaults to a RangeIndex.
         fill_value : scalar, optional
             The scalar value not stored in the columns. By default, this
-            depends on the dtype of `data`.
+            depends on the dtype of ``data``.
 
             =========== ==========
             dtype       na_value
@@ -293,7 +293,7 @@ def from_spmatrix(
             timedelta64 ``pd.NaT``
             =========== ==========
 
-            The default value may be overridden by specifying a `fill_value`.
+            The default value may be overridden by specifying a ``fill_value``.
 
         Returns
         -------
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index d38254d98553e..a483260c2c3bd 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1666,7 +1666,7 @@ class SparseDtype(ExtensionDtype):
     """
     Dtype for data stored in :class:`SparseArray`.
 
-    `SparseDtype` is used as the data type for :class:`SparseArray`, enabling
+    SparseDtype is used as the data type for :class:`SparseArray`, enabling
     more efficient storage of data that contains a significant number of
     repetitive values typically represented by a fill value. It supports any
     scalar dtype as the underlying data type of the non-fill values.
@@ -1677,7 +1677,7 @@ class SparseDtype(ExtensionDtype):
         The dtype of the underlying array storing the non-fill value values.
     fill_value : scalar, optional
         The scalar value not stored in the SparseArray. By default, this
-        depends on `dtype`.
+        depends on ``dtype``.
 
         =========== ==========
         dtype       na_value
@@ -1690,7 +1690,7 @@ class SparseDtype(ExtensionDtype):
         timedelta64 ``pd.NaT``
         =========== ==========
 
-        The default value may be overridden by specifying a `fill_value`.
+        The default value may be overridden by specifying a ``fill_value``.
 
     Attributes
     ----------

From 0d07c30277276f10bdb187738822b4566bcc8be6 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Sat, 22 Jun 2024 13:43:44 +0100
Subject: [PATCH 10/15] DOC: :pencil2: Update dtype docstring.

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/core/dtypes/dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index a483260c2c3bd..3aeab96e03163 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1666,7 +1666,7 @@ class SparseDtype(ExtensionDtype):
     """
     Dtype for data stored in :class:`SparseArray`.
 
-    SparseDtype is used as the data type for :class:`SparseArray`, enabling
+    ``SparseDtype`` is used as the data type for :class:`SparseArray`, enabling
     more efficient storage of data that contains a significant number of
     repetitive values typically represented by a fill value. It supports any
     scalar dtype as the underlying data type of the non-fill values.

From ccba29e4618fa25d810a574f40d146eb47d40e85 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 27 Jun 2024 10:35:22 +0100
Subject: [PATCH 11/15] BUG: :rewind: :bug: Revert fill_value change and fix
 to_coo method.

---
 pandas/core/arrays/sparse/accessor.py | 34 ++++++---------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index 3af394872f387..d104e0a126bc2 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -265,9 +265,7 @@ def _validate(self, data) -> None:
             raise AttributeError(self._validation_msg)
 
     @classmethod
-    def from_spmatrix(
-        cls, data, index=None, columns=None, fill_value=None
-    ) -> DataFrame:
+    def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
         """
         Create a new DataFrame from a scipy sparse matrix.
 
@@ -278,22 +276,6 @@ def from_spmatrix(
         index, columns : Index, optional
             Row and column labels to use for the resulting DataFrame.
             Defaults to a RangeIndex.
-        fill_value : scalar, optional
-            The scalar value not stored in the columns. By default, this
-            depends on the dtype of ``data``.
-
-            =========== ==========
-            dtype       na_value
-            =========== ==========
-            float       ``np.nan``
-            complex     ``np.nan``
-            int         ``0``
-            bool        ``False``
-            datetime64  ``pd.NaT``
-            timedelta64 ``pd.NaT``
-            =========== ==========
-
-            The default value may be overridden by specifying a ``fill_value``.
 
         Returns
         -------
@@ -309,12 +291,12 @@ def from_spmatrix(
         Examples
         --------
         >>> import scipy.sparse
-        >>> mat = scipy.sparse.eye(3, dtype=float)
-        >>> pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0.0)
+        >>> mat = scipy.sparse.eye(3, dtype=int)
+        >>> pd.DataFrame.sparse.from_spmatrix(mat)
              0    1    2
-        0  1.0  0.0  0.0
-        1  0.0  1.0  0.0
-        2  0.0  0.0  1.0
+        0    1    0    0
+        1    0    1    0
+        2    0    0    1
         """
         from pandas._libs.sparse import IntIndex
 
@@ -331,7 +313,7 @@ def from_spmatrix(
         indices = data.indices
         indptr = data.indptr
         array_data = data.data
-        dtype = SparseDtype(array_data.dtype, fill_value)
+        dtype = SparseDtype(array_data.dtype)
         arrays = []
         for i in range(n_columns):
             sl = slice(indptr[i], indptr[i + 1])
@@ -411,8 +393,6 @@ def to_coo(self) -> spmatrix:
         cols, rows, data = [], [], []
         for col, (_, ser) in enumerate(self._parent.items()):
             sp_arr = ser.array
-            if sp_arr.fill_value != 0:
-                raise ValueError("fill value must be 0 when converting to COO matrix")
 
             row = sp_arr.sp_index.indices
             cols.append(np.repeat(col, len(row)))

From d09171e609e341823b61f484b96bf6d8298dc330 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 27 Jun 2024 10:38:17 +0100
Subject: [PATCH 12/15] TST: :rewind: :white_check_mark: Fix and add sparse
 accessor tests.

---
 pandas/tests/arrays/sparse/test_accessor.py | 93 ++++++++-------------
 1 file changed, 37 insertions(+), 56 deletions(-)

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index 163161f15a952..6579fadf7b395 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -105,30 +105,36 @@ def test_accessor_raises(self):
 
     @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
     @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
-    @pytest.mark.parametrize("dtype", ["complex128", "float64", "int64"])
+    @pytest.mark.parametrize("dtype", [np.complex128, np.float64, np.int64, bool])
     def test_from_spmatrix(self, format, labels, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
+        sp_dtype = SparseDtype(dtype)
 
-        mat = sp_sparse.eye(10, format=format, dtype=dtype)
-        result = pd.DataFrame.sparse.from_spmatrix(
-            mat, index=labels, columns=labels, fill_value=0
-        )
+        sp_mat = sp_sparse.eye(10, format=format, dtype=dtype)
+        result = pd.DataFrame.sparse.from_spmatrix(sp_mat, index=labels, columns=labels)
+        mat = np.eye(10, dtype=dtype)
         expected = pd.DataFrame(
-            np.eye(10, dtype=dtype), index=labels, columns=labels
+            np.ma.masked_array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value),
+            index=labels,
+            columns=labels,
         ).astype(sp_dtype)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
-    def test_from_spmatrix_including_explicit_zero(self, format):
+    @pytest.mark.parametrize("dtype", [np.int64, bool])
+    def test_from_spmatrix_including_explicit_zero(self, format, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        mat = sp_sparse.random(10, 2, density=0.5, format=format)
-        mat.data[0] = 0
-        result = pd.DataFrame.sparse.from_spmatrix(mat, fill_value=0)
-        dtype = SparseDtype("float64", 0.0)
-        expected = pd.DataFrame(mat.todense()).astype(dtype)
+        sp_dtype = SparseDtype(dtype)
+
+        sp_mat = sp_sparse.random(10, 2, density=0.5, format=format, dtype=dtype)
+        sp_mat.data[0] = 0
+        result = pd.DataFrame.sparse.from_spmatrix(sp_mat)
+        mat = sp_mat.toarray()
+        expected = pd.DataFrame(
+            np.ma.array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value)
+        ).astype(sp_dtype)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -138,59 +144,34 @@ def test_from_spmatrix_including_explicit_zero(self, format):
     def test_from_spmatrix_columns(self, columns):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        dtype = SparseDtype("float64", 0.0)
-
-        mat = sp_sparse.random(10, 2, density=0.5)
-        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns, fill_value=0)
-        expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "dtype, fill_value",
-        [("bool", False), ("float64", np.nan), ("complex128", np.nan)],
-    )
-    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
-    def test_from_spmatrix_fill_value(self, format, dtype, fill_value):
-        sp_sparse = pytest.importorskip("scipy.sparse")
-
-        sp_dtype = SparseDtype(dtype, fill_value)
+        sp_dtype = SparseDtype(np.float64)
 
-        sp_mat = sp_sparse.eye(10, format=format, dtype=dtype)
-        result = pd.DataFrame.sparse.from_spmatrix(sp_mat, fill_value=fill_value)
-        mat = np.eye(10, dtype=dtype)
+        sp_mat = sp_sparse.random(10, 2, density=0.5)
+        result = pd.DataFrame.sparse.from_spmatrix(sp_mat, columns=columns)
+        mat = sp_mat.toarray()
         expected = pd.DataFrame(
-            np.ma.array(mat, mask=(mat == 0)).filled(fill_value)
+            np.ma.array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value),
+            columns=columns,
         ).astype(sp_dtype)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
+        "columns", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
     )
-    def test_to_coo(self, colnames):
+    @pytest.mark.parametrize("dtype", [np.complex128, np.float64, np.int64, bool])
+    def test_to_coo(self, columns, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        df = pd.DataFrame(
-            {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]"
-        )
-        result = df.sparse.to_coo()
-        expected = sp_sparse.coo_matrix(np.asarray(df))
-        assert (result != expected).nnz == 0
+        sp_dtype = SparseDtype(dtype)
 
-    @pytest.mark.parametrize("fill_value", [1, np.nan])
-    def test_to_coo_nonzero_fill_val_raises(self, fill_value):
-        pytest.importorskip("scipy")
-        df = pd.DataFrame(
-            {
-                "A": SparseArray(
-                    [fill_value, fill_value, fill_value, 2], fill_value=fill_value
-                ),
-                "B": SparseArray(
-                    [fill_value, 2, fill_value, fill_value], fill_value=fill_value
-                ),
-            }
-        )
-        with pytest.raises(ValueError, match="fill value must be 0"):
-            df.sparse.to_coo()
+        expected = sp_sparse.random(10, 2, density=0.5, format="coo", dtype=dtype)
+        mat = expected.toarray()
+        result = pd.DataFrame(
+            np.ma.array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value),
+            columns=columns,
+            dtype=sp_dtype,
+        ).sparse.to_coo()
+        assert (result != expected).nnz == 0
 
     def test_to_coo_midx_categorical(self):
         # GH#50996

From b8134536fec0bbc5f530e58a7b3015fa4cfea66d Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 27 Jun 2024 10:39:31 +0100
Subject: [PATCH 13/15] TST: :rewind: :white_check_mark: Fix and add sparse
 getitem tests.

---
 pandas/tests/indexing/test_loc.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index a2a34314e2801..903ad24ce53b3 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1281,7 +1281,7 @@ def test_loc_getitem_time_object(self, frame_or_series):
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
-    @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
+    @pytest.mark.parametrize("dtype", [np.complex128, np.float64, np.int64, bool])
     def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         sp_sparse = pytest.importorskip("scipy.sparse")
 
@@ -1292,17 +1292,17 @@ def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         # diagonal cells are ones, meaning the last two columns are purely sparse.
         rows, cols = 5, 7
         spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
-        df = DataFrame.sparse.from_spmatrix(spmatrix, fill_value=0)
+        df = DataFrame.sparse.from_spmatrix(spmatrix)
 
         # regression test for GH#34526
         itr_idx = range(2, rows)
-        result = df.loc[itr_idx].values
+        result = np.nan_to_num(df.loc[itr_idx].values)
         expected = spmatrix.toarray()[itr_idx]
         tm.assert_numpy_array_equal(result, expected)
 
         # regression test for GH#34540
         result = df.loc[itr_idx].dtypes.values
-        expected = np.full(cols, SparseDtype(dtype, fill_value=0))
+        expected = np.full(cols, SparseDtype(dtype))
         tm.assert_numpy_array_equal(result, expected)
 
     def test_loc_getitem_listlike_all_retains_sparse(self):
@@ -1314,18 +1314,16 @@ def test_loc_getitem_sparse_frame(self):
         # GH34687
         sp_sparse = pytest.importorskip("scipy.sparse")
 
-        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5), fill_value=0)
+        df = DataFrame.sparse.from_spmatrix(sp_sparse.eye(5, dtype=np.int64))
         result = df.loc[range(2)]
         expected = DataFrame(
-            [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],
-            dtype=SparseDtype("float64", 0.0),
+            [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0]],
+            dtype=SparseDtype(np.int64),
         )
         tm.assert_frame_equal(result, expected)
 
         result = df.loc[range(2)].loc[range(1)]
-        expected = DataFrame(
-            [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0)
-        )
+        expected = DataFrame([[1, 0, 0, 0, 0]], dtype=SparseDtype(np.int64))
         tm.assert_frame_equal(result, expected)
 
     def test_loc_getitem_sparse_series(self):

From 499db2f9f4a8eb54cffcece574346b901421e5cc Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 27 Jun 2024 10:40:29 +0100
Subject: [PATCH 14/15] DOC: :rewind: :memo: Revert fill_value change to sparse
 user guide.

---
 doc/source/user_guide/sparse.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
index 03105d62821a0..25bcb8bcc0c93 100644
--- a/doc/source/user_guide/sparse.rst
+++ b/doc/source/user_guide/sparse.rst
@@ -188,7 +188,7 @@ Use :meth:`DataFrame.sparse.from_spmatrix` to create a :class:`DataFrame` with s
    sp_arr = csr_matrix(arr)
    sp_arr
 
-   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr, fill_value=0)
+   sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr)
    sdf.head()
    sdf.dtypes
 

From 9eb3dac4ed9d3dcbfb8e2bd130159e095d5f9eb5 Mon Sep 17 00:00:00 2001
From: Christopher Titchen
 <109701765+christopher-titchen@users.noreply.github.com>
Date: Thu, 27 Jun 2024 14:58:35 +0100
Subject: [PATCH 15/15] CLN: :pencil2: Fix instantiation of np.ma.array in
 test.

---
 pandas/tests/arrays/sparse/test_accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index 6579fadf7b395..bd3298940ae3a 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -115,7 +115,7 @@ def test_from_spmatrix(self, format, labels, dtype):
         result = pd.DataFrame.sparse.from_spmatrix(sp_mat, index=labels, columns=labels)
         mat = np.eye(10, dtype=dtype)
         expected = pd.DataFrame(
-            np.ma.masked_array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value),
+            np.ma.array(mat, mask=(mat == 0)).filled(sp_dtype.fill_value),
             index=labels,
             columns=labels,
         ).astype(sp_dtype)