From aed2f4c6baf748faa2d80b12591ed5a76beec739 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sun, 12 Nov 2023 10:54:51 -0500 Subject: [PATCH 1/3] Keep original values when taking with a new fill value --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/arrays/sparse/array.py | 3 ++- pandas/tests/arrays/sparse/test_indexing.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index efa4a52993a90..2485aa6b782ac 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -349,6 +349,7 @@ Datetimelike - Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`) - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`) +- Bug in taking from a :class:`SparseArray` when using a different fill value than the array's fill value. (:issue:`55181`) - Timedelta diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index cf349220e4ba7..5db77db2a9c66 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1086,9 +1086,10 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray: ) elif self.sp_index.npoints == 0: - # Avoid taking from the empty self.sp_values + # Use the old fill_value unless we took for an index of -1 _dtype = np.result_type(self.dtype.subtype, type(fill_value)) taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype) + taken[old_fill_indices] = self.fill_value else: taken = self.sp_values.take(sp_indexer) diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py index d63d0fb07b404..f81551899a8ec 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -170,6 +170,13 @@ def test_take_all_empty(self): result = a.take([0, 1], allow_fill=True, fill_value=np.nan) tm.assert_sp_array_equal(a, result) + def test_take_different_fill_value(self): + # Take with a different fill value shouldn't overwrite the original + a = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0)) + result = a.take([0, -1], allow_fill=True, fill_value=np.nan) + expected = pd.array([0, np.nan], dtype=a.dtype) + tm.assert_sp_array_equal(expected, result) + def test_take_fill_value(self): data = np.array([1, np.nan, 0, 3, 0]) sparse = SparseArray(data, fill_value=0) From 33d3580b2ac62cc70b317a6bdde583b36ba913f5 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Mon, 13 Nov 2023 14:45:18 -0500 Subject: [PATCH 2/3] Switch one-letter name --- pandas/tests/arrays/sparse/test_indexing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py index f81551899a8ec..60029ac06ddb4 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -166,15 +166,15 @@ def test_take(self, arr_data, arr): tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp) def test_take_all_empty(self): - a = pd.array([0, 0], dtype=SparseDtype("int64")) - result = a.take([0, 1], allow_fill=True, fill_value=np.nan) - tm.assert_sp_array_equal(a, result) + sparse = pd.array([0, 0], dtype=SparseDtype("int64")) + result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan) + tm.assert_sp_array_equal(sparse, result) def test_take_different_fill_value(self): # Take with a different fill value shouldn't overwrite the original - a = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0)) - result = a.take([0, -1], allow_fill=True, fill_value=np.nan) - expected = pd.array([0, np.nan], dtype=a.dtype) + sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0)) + result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan) + expected = pd.array([0, np.nan], dtype=sparse.dtype) tm.assert_sp_array_equal(expected, result) def test_take_fill_value(self): From 0dae5a683f92365ce44a0968b15fe6c41f41b417 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:33:32 -0500 Subject: [PATCH 3/3] Fix whatsnew --- doc/source/whatsnew/v2.2.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 2485aa6b782ac..8210bcf1feefe 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -349,7 +349,6 @@ Datetimelike - Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`) - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`) -- Bug in taking from a :class:`SparseArray` when using a different fill value than the array's fill value. (:issue:`55181`) - Timedelta @@ -446,7 +445,7 @@ Reshaping Sparse ^^^^^^ -- +- Bug in :meth:`SparseArray.take` when using a different fill value than the array's fill value (:issue:`55181`) - ExtensionArray