diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5882b74aa8b05..af63d49a24d7a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -361,6 +361,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`) +- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`) - diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e93cdb608dffb..eae401f9744f0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -457,6 +457,11 @@ def astype(self, dtype, copy=True): from pandas.core.arrays.string_ import StringDtype dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self.dtype): + if not copy: + return self + elif copy: + return self.copy() if isinstance(dtype, StringDtype): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index bd4bdc5ecb46f..3bd36209b3c71 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -375,7 +375,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if isinstance(dtype, BooleanDtype): values, mask = coerce_to_array(self, copy=copy) - return BooleanArray(values, mask, copy=False) + if not copy: + return self + else: + return BooleanArray(values, mask, copy=False) elif isinstance(dtype, StringDtype): return dtype.construct_array_type()._from_sequence(self, copy=False) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 44c0455018a42..372ef7df9dc3a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -33,6 +33,7 @@ TD64NS_DTYPE, ensure_object, is_datetime64_dtype, + is_dtype_equal, is_float_dtype, is_period_dtype, pandas_dtype, @@ -582,7 +583,11 @@ def astype(self, dtype, copy: bool = True): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) - + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + elif copy: + return self.copy() if is_period_dtype(dtype): return self.asfreq(dtype.freq) return super().astype(dtype, copy=copy) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index c88af77ea6189..528d78a5414ea 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1063,6 +1063,11 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + elif copy: + return self.copy() dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 3aaf040a4279b..039b42210224e 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas.core.internals import ObjectBlock @@ -56,3 +57,11 @@ def test_astype_empty_dataframe(self, dtype): df = pd.DataFrame() result = df.astype(dtype) self.assert_frame_equal(result, df) + + @pytest.mark.parametrize("copy", [True, False]) + def test_astype_own_type(self, data, copy): + # ensure that astype returns the original object for equal dtype and copy=False + # https://github.com/pandas-dev/pandas/issues/28488 + result = data.astype(data.dtype, copy=copy) + assert (result is data) is (not copy) + self.assert_extension_array_equal(result, data) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 9147360e71c73..2895f33d5c887 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -7,7 +7,7 @@ import numpy as np from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.common import pandas_dtype +from pandas.core.dtypes.common import is_dtype_equal, pandas_dtype import pandas as pd from pandas.api.extensions import no_default, register_extension_dtype @@ -131,9 +131,12 @@ def copy(self): return type(self)(self._data.copy()) def astype(self, dtype, copy=True): + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self dtype = pandas_dtype(dtype) if isinstance(dtype, type(self.dtype)): - return type(self)(self._data, context=dtype.context) + return type(self)(self._data, copy=copy, context=dtype.context) return super().astype(dtype, copy=copy) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index bbfaacae1b444..c4afcd7a536df 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -177,7 +177,7 @@ def test_take_series(self, data): def test_loc_iloc_frame_single_dtype(self, data, request): npdtype = data.dtype.numpy_dtype - if npdtype == object or npdtype == np.float64: + if npdtype == object: # GH#33125 mark = pytest.mark.xfail( reason="GH#33125 astype doesn't recognize data.dtype" @@ -191,14 +191,6 @@ class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): - # ValueError: Names should be list-like for a MultiIndex - a = "a" - is_identity = groupby_apply_op(a) is a - if data_for_grouping.dtype.numpy_dtype == np.float64 and is_identity: - mark = pytest.mark.xfail( - reason="GH#33125 astype doesn't recognize data.dtype" - ) - request.node.add_marker(mark) super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) @@ -306,11 +298,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): class TestPrinting(BaseNumPyTests, base.BasePrintingTests): - @pytest.mark.xfail( - reason="GH#33125 PandasArray.astype does not recognize PandasDtype" - ) - def test_series_repr(self, data): - super().test_series_repr(data) + pass @skip_nested