Skip to content

DEPR: SparseArray.astype #49324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ Removal of prior version deprecations/changes
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`)
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ class ellipsis(Enum):

SparseIndexKind = Literal["integer", "block"]

from pandas.core.dtypes.dtypes import ExtensionDtype

from pandas import Series

else:
Expand Down Expand Up @@ -1328,14 +1330,13 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
future_dtype = pandas_dtype(dtype)
if not isinstance(future_dtype, SparseDtype):
# GH#34457
warnings.warn(
"The behavior of .astype from SparseDtype to a non-sparse dtype "
"is deprecated. In a future version, this will return a non-sparse "
"array with the requested dtype. To retain the old behavior, use "
"`obj.astype(SparseDtype(dtype))`",
FutureWarning,
stacklevel=find_stack_level(),
)
if isinstance(future_dtype, np.dtype):
values = np.array(self)
return astype_nansafe(values, dtype=future_dtype)
else:
dtype = cast(ExtensionDtype, dtype)
cls = dtype.construct_array_type()
return cls._from_sequence(self, dtype=dtype, copy=copy)

dtype = self.dtype.update_dtype(dtype)
subtype = pandas_dtype(dtype._subtype_with_str)
Expand Down
30 changes: 7 additions & 23 deletions pandas/tests/arrays/sparse/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,9 @@ def test_astype(self):

def test_astype_bool(self):
a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
result = a.astype(bool)
expected = SparseArray(
[True, False, False, True], dtype=SparseDtype(bool, False)
)
tm.assert_sp_array_equal(result, expected)
result = a.astype(bool)
expected = np.array([1, 0, 0, 1], dtype=bool)
tm.assert_numpy_array_equal(result, expected)

# update fill value
result = a.astype(SparseDtype(bool, False))
Expand All @@ -57,12 +54,8 @@ def test_astype_all(self, any_real_numpy_dtype):
vals = np.array([1, 2, 3])
arr = SparseArray(vals, fill_value=1)
typ = np.dtype(any_real_numpy_dtype)
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
res = arr.astype(typ)
assert res.dtype == SparseDtype(typ, 1)
assert res.sp_values.dtype == typ

tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ))
res = arr.astype(typ)
tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))

@pytest.mark.parametrize(
"arr, dtype, expected",
Expand Down Expand Up @@ -100,22 +93,13 @@ def test_astype_all(self, any_real_numpy_dtype):
],
)
def test_astype_more(self, arr, dtype, expected):

if isinstance(dtype, SparseDtype):
warn = None
else:
warn = FutureWarning

with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
result = arr.astype(dtype)
result = arr.astype(arr.dtype.update_dtype(dtype))
tm.assert_sp_array_equal(result, expected)

def test_astype_nan_raises(self):
arr = SparseArray([1.0, np.nan])
with pytest.raises(ValueError, match="Cannot convert non-finite"):
msg = "astype from SparseDtype"
with tm.assert_produces_warning(FutureWarning, match=msg):
arr.astype(int)
arr.astype(int)

def test_astype_copy_false(self):
# GH#34456 bug caused by using .view instead of .astype in astype_nansafe
Expand Down
17 changes: 3 additions & 14 deletions pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest

import pandas as pd
import pandas._testing as tm
from pandas.api.extensions import ExtensionArray
from pandas.core.internals.blocks import EABackedBlock
from pandas.tests.extension.base.base import BaseExtensionTests
Expand Down Expand Up @@ -319,23 +318,13 @@ def test_unstack(self, data, index, obj):
alt = df.unstack(level=level).droplevel(0, axis=1)
self.assert_frame_equal(result, alt)

if obj == "series":
is_sparse = isinstance(ser.dtype, pd.SparseDtype)
else:
is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype)
warn = None if not is_sparse else FutureWarning
with tm.assert_produces_warning(warn, match="astype from Sparse"):
obj_ser = ser.astype(object)
obj_ser = ser.astype(object)

expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
if obj == "series" and not is_sparse:
# GH#34457 SparseArray.astype(object) gives Sparse[object]
# instead of np.dtype(object)
if obj == "series":
assert (expected.dtypes == object).all()

with tm.assert_produces_warning(warn, match="astype from Sparse"):
result = result.astype(object)

result = result.astype(object)
self.assert_frame_equal(result, expected)

def test_ravel(self, data):
Expand Down
37 changes: 5 additions & 32 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

from pandas.errors import PerformanceWarning

from pandas.core.dtypes.common import is_object_dtype

import pandas as pd
from pandas import SparseDtype
import pandas._testing as tm
Expand Down Expand Up @@ -159,10 +157,7 @@ def test_concat_mixed_dtypes(self, data):
],
)
def test_stack(self, data, columns):
with tm.assert_produces_warning(
FutureWarning, check_stacklevel=False, match="astype from Sparse"
):
super().test_stack(data, columns)
super().test_stack(data, columns)

def test_concat_columns(self, data, na_value):
self._check_unsupported(data)
Expand Down Expand Up @@ -385,33 +380,11 @@ def test_equals(self, data, na_value, as_series, box):


class TestCasting(BaseSparseTests, base.BaseCastingTests):
def test_astype_object_series(self, all_data):
# Unlike the base class, we do not expect the resulting Block
# to be ObjectBlock / resulting array to be np.dtype("object")
ser = pd.Series(all_data, name="A")
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
result = ser.astype(object)
assert is_object_dtype(result.dtype)
assert is_object_dtype(result._mgr.array.dtype)

def test_astype_object_frame(self, all_data):
# Unlike the base class, we do not expect the resulting Block
# to be ObjectBlock / resulting array to be np.dtype("object")
df = pd.DataFrame({"A": all_data})

with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
result = df.astype(object)
assert is_object_dtype(result._mgr.arrays[0].dtype)

# check that we can compare the dtypes
comp = result.dtypes == df.dtypes
assert not comp.any()

def test_astype_str(self, data):
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
result = pd.Series(data[:5]).astype(str)
expected_dtype = SparseDtype(str, str(data.fill_value))
expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype)
# pre-2.0 this would give a SparseDtype even if the user asked
# for a non-sparse dtype.
result = pd.Series(data[:5]).astype(str)
expected = pd.Series([str(x) for x in data[:5]], dtype=object)
self.assert_series_equal(result, expected)

@pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")
Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/frame/methods/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
def test_other_dtypes(self, data, dtype, using_array_manager):
df = DataFrame(data, dtype=dtype)

warn = None
if using_array_manager and isinstance(dtype, pd.SparseDtype):
warn = FutureWarning

with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
result = df._append(df.iloc[0]).iloc[-1]
result = df._append(df.iloc[0]).iloc[-1]

expected = Series(data, name=0, dtype=dtype)
tm.assert_series_equal(result, expected)
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,10 +1300,6 @@ def test_loc_getitem_time_object(self, frame_or_series):
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
@pytest.mark.filterwarnings(
# TODO(2.0): remove filtering; note only needed for using_array_manager
"ignore:The behavior of .astype from SparseDtype.*FutureWarning"
)
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse

Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
name = "name" # op(pd.Series, array) preserves the name.
series = pd.Series(a1, name=name)

warn = None if not sparse else FutureWarning
with tm.assert_produces_warning(warn):
other = pd.Index(a2, name=name).astype("int64")
other = pd.Index(a2, name=name).astype("int64")

array_args = (a1, a2)
series_args = (series, other) # ufunc(series, array)
Expand Down