From 1ccd086a166ef2199026f6ac0c5cc6ab5911b5f4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 25 Oct 2022 20:02:35 -0700 Subject: [PATCH 1/3] DEPR: SparseArray.astype --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/sparse/array.py | 17 +++++++------ pandas/tests/arrays/sparse/test_astype.py | 30 ++++++----------------- pandas/tests/extension/base/reshaping.py | 17 +++---------- pandas/tests/extension/test_sparse.py | 20 ++++++--------- pandas/tests/series/test_ufunc.py | 4 +-- 6 files changed, 29 insertions(+), 60 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 78ea78ec97a3a..dd6b866b2ec63 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -248,6 +248,7 @@ Removal of prior version deprecations/changes - Removed setting Categorical._codes directly (:issue:`41429`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) - Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`) +- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) - Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`) - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 38d3e0d73ef2c..c87811f3e6a16 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -118,6 +118,8 @@ class ellipsis(Enum): SparseIndexKind = Literal["integer", "block"] + from pandas.core.dtypes.dtypes import ExtensionDtype + from pandas import Series else: @@ -1326,14 +1328,13 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): future_dtype = pandas_dtype(dtype) if not isinstance(future_dtype, SparseDtype): # GH#34457 - warnings.warn( - "The behavior of .astype from SparseDtype to a non-sparse dtype " - "is deprecated. In a future version, this will return a non-sparse " - "array with the requested dtype. To retain the old behavior, use " - "`obj.astype(SparseDtype(dtype))`", - FutureWarning, - stacklevel=find_stack_level(), - ) + if isinstance(future_dtype, np.dtype): + values = np.array(self) + return astype_nansafe(values, dtype=future_dtype) + else: + dtype = cast(ExtensionDtype, dtype) + cls = dtype.construct_array_type() + return cls._from_sequence(self, dtype=dtype, copy=copy) dtype = self.dtype.update_dtype(dtype) subtype = pandas_dtype(dtype._subtype_with_str) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 6761040d444a5..8751b9bb294ae 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -39,12 +39,9 @@ def test_astype(self): def test_astype_bool(self): a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = a.astype(bool) - expected = SparseArray( - [True, False, False, True], dtype=SparseDtype(bool, False) - ) - tm.assert_sp_array_equal(result, expected) + result = a.astype(bool) + expected = np.array([1, 0, 0, 1], dtype=bool) + tm.assert_numpy_array_equal(result, expected) # update fill value result = a.astype(SparseDtype(bool, False)) @@ -57,12 +54,8 @@ def test_astype_all(self, any_real_numpy_dtype): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) typ = np.dtype(any_real_numpy_dtype) - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - res = arr.astype(typ) - assert res.dtype == SparseDtype(typ, 1) - assert res.sp_values.dtype == typ - - tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) + res = arr.astype(typ) + tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype)) @pytest.mark.parametrize( "arr, dtype, expected", @@ -100,22 +93,13 @@ def test_astype_all(self, any_real_numpy_dtype): ], ) def test_astype_more(self, arr, dtype, expected): - - if isinstance(dtype, SparseDtype): - warn = None - else: - warn = FutureWarning - - with tm.assert_produces_warning(warn, match="astype from SparseDtype"): - result = arr.astype(dtype) + result = arr.astype(arr.dtype.update_dtype(dtype)) tm.assert_sp_array_equal(result, expected) def test_astype_nan_raises(self): arr = SparseArray([1.0, np.nan]) with pytest.raises(ValueError, match="Cannot convert non-finite"): - msg = "astype from SparseDtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - arr.astype(int) + arr.astype(int) def test_astype_copy_false(self): # GH#34456 bug caused by using .view instead of .astype in astype_nansafe diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index babb2868a4421..cc970c690529d 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -4,7 +4,6 @@ import pytest import pandas as pd -import pandas._testing as tm from pandas.api.extensions import ExtensionArray from pandas.core.internals.blocks import EABackedBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -319,23 +318,13 @@ def test_unstack(self, data, index, obj): alt = df.unstack(level=level).droplevel(0, axis=1) self.assert_frame_equal(result, alt) - if obj == "series": - is_sparse = isinstance(ser.dtype, pd.SparseDtype) - else: - is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype) - warn = None if not is_sparse else FutureWarning - with tm.assert_produces_warning(warn, match="astype from Sparse"): - obj_ser = ser.astype(object) + obj_ser = ser.astype(object) expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value) - if obj == "series" and not is_sparse: - # GH#34457 SparseArray.astype(object) gives Sparse[object] - # instead of np.dtype(object) + if obj == "series": assert (expected.dtypes == object).all() - with tm.assert_produces_warning(warn, match="astype from Sparse"): - result = result.astype(object) - + result = result.astype(object) self.assert_frame_equal(result, expected) def test_ravel(self, data): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index b1111951d67fa..d05e98232e6d8 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -159,10 +159,7 @@ def test_concat_mixed_dtypes(self, data): ], ) def test_stack(self, data, columns): - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False, match="astype from Sparse" - ): - super().test_stack(data, columns) + super().test_stack(data, columns) def test_concat_columns(self, data, na_value): self._check_unsupported(data) @@ -385,7 +382,7 @@ def test_equals(self, data, na_value, as_series, box): class TestCasting(BaseSparseTests, base.BaseCastingTests): - def test_astype_object_series(self, all_data): + def _test_astype_object_series(self, all_data): # Unlike the base class, we do not expect the resulting Block # to be ObjectBlock / resulting array to be np.dtype("object") ser = pd.Series(all_data, name="A") @@ -394,13 +391,12 @@ def test_astype_object_series(self, all_data): assert is_object_dtype(result.dtype) assert is_object_dtype(result._mgr.array.dtype) - def test_astype_object_frame(self, all_data): + def _test_astype_object_frame(self, all_data): # Unlike the base class, we do not expect the resulting Block # to be ObjectBlock / resulting array to be np.dtype("object") df = pd.DataFrame({"A": all_data}) - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = df.astype(object) + result = df.astype(object) assert is_object_dtype(result._mgr.arrays[0].dtype) # check that we can compare the dtypes @@ -408,10 +404,10 @@ def test_astype_object_frame(self, all_data): assert not comp.any() def test_astype_str(self, data): - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = pd.Series(data[:5]).astype(str) - expected_dtype = SparseDtype(str, str(data.fill_value)) - expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) + # pre-2.0 this would give a SparseDtype even if the user asked + # for a non-sparse dtype. + result = pd.Series(data[:5]).astype(str) + expected = pd.Series([str(x) for x in data[:5]], dtype=object) self.assert_series_equal(result, expected) @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype") diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 4e53000059cdc..6483ad37a2886 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -85,9 +85,7 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): name = "name" # op(pd.Series, array) preserves the name. series = pd.Series(a1, name=name) - warn = None if not sparse else FutureWarning - with tm.assert_produces_warning(warn): - other = pd.Index(a2, name=name).astype("int64") + other = pd.Index(a2, name=name).astype("int64") array_args = (a1, a2) series_args = (series, other) # ufunc(series, array) From 23c2eb58bcca3b0f1ebce524cce389dee9e7b782 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Oct 2022 09:21:19 -0700 Subject: [PATCH 2/3] fix append test --- pandas/tests/frame/methods/test_append.py | 7 +------ pandas/tests/indexing/test_loc.py | 4 ---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index f07ffee20a55f..54d7d95ed4570 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -241,12 +241,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): def test_other_dtypes(self, data, dtype, using_array_manager): df = DataFrame(data, dtype=dtype) - warn = None - if using_array_manager and isinstance(dtype, pd.SparseDtype): - warn = FutureWarning - - with tm.assert_produces_warning(warn, match="astype from SparseDtype"): - result = df._append(df.iloc[0]).iloc[-1] + result = df._append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3490d05f13e9d..c87b590e6bf01 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1300,10 +1300,6 @@ def test_loc_getitem_time_object(self, frame_or_series): @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) @td.skip_if_no_scipy - @pytest.mark.filterwarnings( - # TODO(2.0): remove filtering; note only needed for using_array_manager - "ignore:The behavior of .astype from SparseDtype.*FutureWarning" - ) def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): import scipy.sparse From e1824cf81f9bbb141e15a1b0ed89ba75aa2f5334 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Oct 2022 13:47:55 -0700 Subject: [PATCH 3/3] remove no-longer-overriden tests --- pandas/tests/extension/test_sparse.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index d05e98232e6d8..c051119f0fec4 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -19,8 +19,6 @@ from pandas.errors import PerformanceWarning -from pandas.core.dtypes.common import is_object_dtype - import pandas as pd from pandas import SparseDtype import pandas._testing as tm @@ -382,27 +380,6 @@ def test_equals(self, data, na_value, as_series, box): class TestCasting(BaseSparseTests, base.BaseCastingTests): - def _test_astype_object_series(self, all_data): - # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock / resulting array to be np.dtype("object") - ser = pd.Series(all_data, name="A") - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = ser.astype(object) - assert is_object_dtype(result.dtype) - assert is_object_dtype(result._mgr.array.dtype) - - def _test_astype_object_frame(self, all_data): - # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock / resulting array to be np.dtype("object") - df = pd.DataFrame({"A": all_data}) - - result = df.astype(object) - assert is_object_dtype(result._mgr.arrays[0].dtype) - - # check that we can compare the dtypes - comp = result.dtypes == df.dtypes - assert not comp.any() - def test_astype_str(self, data): # pre-2.0 this would give a SparseDtype even if the user asked # for a non-sparse dtype.