From 2b8f73814ccf03812f25ca510c8b90f659337e0a Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:41:35 -0700 Subject: [PATCH 01/29] Add test for self dtype --- pandas/tests/extension/base/casting.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 3aaf040a4279b..14e9b6f8247eb 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas.core.internals import ObjectBlock @@ -56,3 +57,9 @@ def test_astype_empty_dataframe(self, dtype): df = pd.DataFrame() result = df.astype(dtype) self.assert_frame_equal(result, df) + + @pytest.mark.parametrize('copy', [True, False]) + def test_astype_own_type(self, data, copy): + result = data.astype(data.dtype, copy=copy) + assert (result is data) is (not copy) + self.assert_extension_array_equal(result, data) From 6cc98c9726a70240e11ec71c6421ffb39934abe7 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Wed, 1 Jul 2020 21:55:43 -0700 Subject: [PATCH 02/29] Fix boolean astype method copying behaior --- pandas/core/arrays/boolean.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index dbce71b77a425..398d4ae56311e 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -376,7 +376,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if isinstance(dtype, BooleanDtype): values, mask = coerce_to_array(self, copy=copy) - return BooleanArray(values, mask, copy=False) + if copy: + return BooleanArray(values, mask, copy=False) + else: + return self elif isinstance(dtype, StringDtype): return dtype.construct_array_type()._from_sequence(self, copy=False) From ecbc762d8457aa6719642cbb435bb084f10adae3 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 15:37:07 -0700 Subject: [PATCH 03/29] Fix integer IntegerArray astype, same problem - Issue is that .astype of self's type always returned a copy - Change BooleanArray fix to have consistent test order --- pandas/core/arrays/boolean.py | 6 +++--- pandas/core/arrays/integer.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 398d4ae56311e..9accb812d7fb4 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -376,10 +376,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if isinstance(dtype, BooleanDtype): values, mask = coerce_to_array(self, copy=copy) - if copy: - return BooleanArray(values, mask, copy=False) - else: + if not copy: return self + else: + return BooleanArray(values, mask, copy=False) elif isinstance(dtype, StringDtype): return dtype.construct_array_type()._from_sequence(self, copy=False) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index df43b5d6115ba..690f0426ddf57 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -452,6 +452,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: # if we are astyping to an existing IntegerDtype we can fastpath if isinstance(dtype, _IntegerDtype): + if dtype is self.dtype and not copy: + return self result = self._data.astype(dtype.numpy_dtype, copy=False) return dtype.construct_array_type()(result, mask=self._mask, copy=False) elif isinstance(dtype, BooleanDtype): From 4fa6d92db1ffaa92461d515c061117be275643b4 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:06:42 -0700 Subject: [PATCH 04/29] Hack in fix for PandasArray.astype() -Just check if dtype is same, then return self or self.copy() - Make test order consistent w/ other classes --- pandas/core/arrays/numpy_.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f6dfb1f0f1e62..a1fd7a8525713 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -5,7 +5,7 @@ from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib -from pandas._typing import Scalar +from pandas._typing import Scalar, ArrayLike from pandas.compat.numpy import function as nv from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs @@ -277,6 +277,14 @@ def __setitem__(self, key, value) -> None: self._ndarray[key] = value + def astype(self, dtype, copy: bool = True) -> ArrayLike: + if not copy and dtype == self._dtype: + return self + elif copy and dtype == self._dtype: + return self.copy() + else: + return super().astype(dtype, copy) + def isna(self) -> np.ndarray: return isna(self._ndarray) From 6ff48ea920b65fa601120cd320ff04aa4f129d26 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:11:25 -0700 Subject: [PATCH 05/29] Same hacky fix for period -Continue to make style consistent --- pandas/core/arrays/numpy_.py | 9 +++++---- pandas/core/arrays/period.py | 6 +++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index a1fd7a8525713..9248df61755fc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -278,10 +278,11 @@ def __setitem__(self, key, value) -> None: self._ndarray[key] = value def astype(self, dtype, copy: bool = True) -> ArrayLike: - if not copy and dtype == self._dtype: - return self - elif copy and dtype == self._dtype: - return self.copy() + if dtype == self.dtype: + if not copy: + return self + elif copy: + return self.copy() else: return super().astype(dtype, copy) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 4b4df3445be4e..f9eb3c5f8998f 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -575,7 +575,11 @@ def astype(self, dtype, copy: bool = True): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) - + if dtype == self._dtype: + if not copy: + return self + elif copy: + return self.copy() if is_period_dtype(dtype): return self.asfreq(dtype.freq) return super().astype(dtype, copy=copy) From 1470c6be925b479e0cedeb7805b0f6ce8b683e66 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:14:06 -0700 Subject: [PATCH 06/29] Same hacky fix for sparse --- pandas/core/arrays/sparse/array.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4996a10002c63..f307ebe521aef 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1061,6 +1061,10 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ + if dtype == self.dtype and copy: + return self.copy() + elif dtype == self.dtype and not copy: + return self dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot From 9e965471a4e6d019b4ba263728871aaf8658166a Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:29:29 -0700 Subject: [PATCH 07/29] Fix StringArray in same hacky way, does not fix __eq__ problem - I turned off the test that was failing temporarily because of __eq__ so that I could continue with the other tests --- pandas/core/arrays/string_.py | 5 +++++ pandas/tests/extension/test_string.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index ac501a8afbe09..44354d3fb4e51 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -278,6 +278,11 @@ def fillna(self, value=None, method=None, limit=None): return super().fillna(value, method, limit) def astype(self, dtype, copy=True): + if dtype == self.dtype: + if not copy: + return self + elif copy: + return self.copy() dtype = pandas_dtype(dtype) if isinstance(dtype, StringDtype): if copy: diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 27a157d2127f6..f6c1a826f736b 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -95,8 +95,8 @@ def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) -class TestCasting(base.BaseCastingTests): - pass +# class TestCasting(base.BaseCastingTests): +# pass class TestComparisonOps(base.BaseComparisonOpsTests): From 2f287ca9a216eaee66bbf1b4c4c7afdacc6aa414 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 2 Jul 2020 16:30:55 -0700 Subject: [PATCH 08/29] Fix DecimalArray in same hacky way --- pandas/tests/extension/decimal/array.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 4d5be75ff8200..729e884eea3cf 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -131,6 +131,11 @@ def copy(self): return type(self)(self._data.copy()) def astype(self, dtype, copy=True): + if dtype == self._dtype: + if not copy: + return self + elif copy: + return self.copy() dtype = pandas_dtype(dtype) if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) From c7baa6258bc21b0eae44ed121cd15086d24ef705 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Fri, 3 Jul 2020 12:47:00 -0700 Subject: [PATCH 09/29] Undo removal of string tests --- pandas/tests/extension/test_string.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index f6c1a826f736b..27a157d2127f6 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -95,8 +95,8 @@ def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) -# class TestCasting(base.BaseCastingTests): -# pass +class TestCasting(base.BaseCastingTests): + pass class TestComparisonOps(base.BaseComparisonOpsTests): From b68f4fbd6ab1630773aba915443c41ab200788b4 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Fri, 3 Jul 2020 12:53:03 -0700 Subject: [PATCH 10/29] Fix formatting to pass 'black pandas' --- pandas/tests/extension/base/casting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 14e9b6f8247eb..415722890dea4 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -58,7 +58,7 @@ def test_astype_empty_dataframe(self, dtype): result = df.astype(dtype) self.assert_frame_equal(result, df) - @pytest.mark.parametrize('copy', [True, False]) + @pytest.mark.parametrize("copy", [True, False]) def test_astype_own_type(self, data, copy): result = data.astype(data.dtype, copy=copy) assert (result is data) is (not copy) From 462704dfc377ed05f7709c01550b4838aac71291 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Fri, 3 Jul 2020 16:06:57 -0700 Subject: [PATCH 11/29] Add changes to whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9bd4ddbb624d9..d7c5897bdc729 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1134,6 +1134,7 @@ ExtensionArray - Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`) - Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`) - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`) +- Fixed bug where `ExtensionArray.astype(dtype=?????, copy=False)` would return a new object (:issue:`284881) Other ^^^^^ From 72cb49e2a45f74a7dd8b5149022ff3290793f81c Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 5 Sep 2020 17:09:16 -0700 Subject: [PATCH 12/29] Update v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8818727766942..a49b29d691692 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1191,8 +1191,6 @@ ExtensionArray - Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`) - Fixed bug in :meth:`IntegerArray.astype` to correctly copy the mask as well (:issue:`34931`). - - Other ^^^^^ From ced29edc31489ec77489ff7d2f89ec887a53918c Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 5 Sep 2020 17:23:26 -0700 Subject: [PATCH 13/29] Temporarily disable TestCasting for strings --- pandas/tests/extension/test_string.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 27a157d2127f6..af66449a86752 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -94,9 +94,9 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) - -class TestCasting(base.BaseCastingTests): - pass +# +# class TestCasting(base.BaseCastingTests): +# pass class TestComparisonOps(base.BaseComparisonOpsTests): From 4ff47fbc700c62facd3ff64da2cb716aec38a16a Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Tue, 8 Sep 2020 16:05:29 -0700 Subject: [PATCH 14/29] Revert "Temporarily disable TestCasting for strings" This reverts commit ced29edc31489ec77489ff7d2f89ec887a53918c. --- pandas/tests/extension/test_string.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index af66449a86752..27a157d2127f6 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -94,9 +94,9 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) -# -# class TestCasting(base.BaseCastingTests): -# pass + +class TestCasting(base.BaseCastingTests): + pass class TestComparisonOps(base.BaseComparisonOpsTests): From d5095d4be1a8b7a124bb8b595716de1c442d47cb Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Tue, 8 Sep 2020 16:09:50 -0700 Subject: [PATCH 15/29] Fix style in `sparse/array.py` to match other changes --- pandas/core/arrays/sparse/array.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index cb048fe94a34a..c960ac8be13ee 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1063,10 +1063,11 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ - if dtype == self.dtype and copy: - return self.copy() - elif dtype == self.dtype and not copy: - return self + if dtype == self._dtype: + if not copy: + return self + elif copy: + return self.copy() dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot From 5f464a69f1d841bf030ff0a1b66899fb1998725b Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Wed, 9 Sep 2020 11:54:24 -0700 Subject: [PATCH 16/29] Remove redundant copy check from `string_.py` --- pandas/core/arrays/string_.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index dd77c9780421e..381968f9724b6 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -263,11 +263,6 @@ def fillna(self, value=None, method=None, limit=None): return super().fillna(value, method, limit) def astype(self, dtype, copy=True): - if dtype == self.dtype: - if not copy: - return self - elif copy: - return self.copy() dtype = pandas_dtype(dtype) if isinstance(dtype, StringDtype): if copy: From 7f4537011c7431a7e98f86463be8a817220230a2 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Wed, 9 Sep 2020 12:18:52 -0700 Subject: [PATCH 17/29] Add 'if copy' check to base ExtensionArray astype class - Remove corresponding check from numpy_.py - Note that all tests now pass --- pandas/core/arrays/base.py | 5 +++++ pandas/core/arrays/numpy_.py | 10 +++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8193d65b3b30c..906c54540c2b8 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -459,6 +459,11 @@ def astype(self, dtype, copy=True): from pandas.core.arrays.string_ import StringDtype dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, self.dtype): + if not copy: + return self + elif copy: + return self.copy() if isinstance(dtype, StringDtype): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 89e2858cbf7ed..80331a36973e1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.common import is_dtype_equal from pandas import compat from pandas.core import nanops, ops @@ -25,6 +26,7 @@ from pandas.core.missing import backfill_1d, pad_1d + class PandasDtype(ExtensionDtype): """ A Pandas ExtensionDtype for NumPy dtypes. @@ -277,13 +279,7 @@ def __setitem__(self, key, value) -> None: self._ndarray[key] = value def astype(self, dtype, copy: bool = True) -> ArrayLike: - if dtype == self.dtype: - if not copy: - return self - elif copy: - return self.copy() - else: - return super().astype(dtype, copy) + return super().astype(dtype, copy) def isna(self) -> np.ndarray: return isna(self._ndarray) From f134cf42f587b323106b3a11e235955f607019d0 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Wed, 9 Sep 2020 13:13:11 -0700 Subject: [PATCH 18/29] Clean up small formatting/import issues in numpy_.py --- pandas/core/arrays/numpy_.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 80331a36973e1..bb3933ae1ee5b 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import isna -from pandas.core.dtypes.common import is_dtype_equal from pandas import compat from pandas.core import nanops, ops @@ -26,7 +25,6 @@ from pandas.core.missing import backfill_1d, pad_1d - class PandasDtype(ExtensionDtype): """ A Pandas ExtensionDtype for NumPy dtypes. From fbbe28aec3c7a4cfb3358376adcaf6b4a38b49f3 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Wed, 9 Sep 2020 14:06:18 -0700 Subject: [PATCH 19/29] Swap `dtype == self.dtype` checks for `is_dtype_equal` --- pandas/core/arrays/period.py | 3 ++- pandas/core/arrays/sparse/array.py | 2 +- pandas/tests/extension/decimal/array.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8edf6ea93e078..80fba9d6d323e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -33,6 +33,7 @@ TD64NS_DTYPE, ensure_object, is_datetime64_dtype, + is_dtype_equal, is_float_dtype, is_period_dtype, pandas_dtype, @@ -577,7 +578,7 @@ def astype(self, dtype, copy: bool = True): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) - if dtype == self._dtype: + if is_dtype_equal(dtype, self._dtype): if not copy: return self elif copy: diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index c960ac8be13ee..62e9e22ab4d1a 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1063,7 +1063,7 @@ def astype(self, dtype=None, copy=True): IntIndex Indices: array([2, 3], dtype=int32) """ - if dtype == self._dtype: + if is_dtype_equal(dtype, self._dtype): if not copy: return self elif copy: diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 36cd323776e5e..ffd7b0a4ec958 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -7,7 +7,7 @@ import numpy as np from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.common import pandas_dtype +from pandas.core.dtypes.common import pandas_dtype, is_dtype_equal import pandas as pd from pandas.api.extensions import no_default, register_extension_dtype @@ -131,7 +131,7 @@ def copy(self): return type(self)(self._data.copy()) def astype(self, dtype, copy=True): - if dtype == self._dtype: + if is_dtype_equal(dtype, self._dtype): if not copy: return self elif copy: From dcd884deeeb1828ed00784439c2dfb137349b923 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 10 Sep 2020 12:13:15 -0700 Subject: [PATCH 20/29] Remove pointless astype method definition in `PandasArray` --- pandas/core/arrays/numpy_.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index bb3933ae1ee5b..d1047a1da236c 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -276,9 +276,6 @@ def __setitem__(self, key, value) -> None: self._ndarray[key] = value - def astype(self, dtype, copy: bool = True) -> ArrayLike: - return super().astype(dtype, copy) - def isna(self) -> np.ndarray: return isna(self._ndarray) From a4a32dbf5106fff2e9fcd7d3a2178233b2c6fd20 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 10 Sep 2020 12:23:18 -0700 Subject: [PATCH 21/29] Make DecimalArray use preexisting code for own-type copy --- pandas/tests/extension/decimal/array.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index ffd7b0a4ec958..85d59fbff31a5 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -134,11 +134,9 @@ def astype(self, dtype, copy=True): if is_dtype_equal(dtype, self._dtype): if not copy: return self - elif copy: - return self.copy() dtype = pandas_dtype(dtype) if isinstance(dtype, type(self.dtype)): - return type(self)(self._data, context=dtype.context) + return type(self)(self._data, copy=copy, context=dtype.context) return super().astype(dtype, copy=copy) From b5b1ccc8555c72abe002b6e2a78f4a2f6ed0145b Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Thu, 10 Sep 2020 12:48:39 -0700 Subject: [PATCH 22/29] Fix passing xfail tests in test_numpy.py --- pandas/tests/extension/test_numpy.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index bbfaacae1b444..cdcdb09b58e28 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -177,7 +177,7 @@ def test_take_series(self, data): def test_loc_iloc_frame_single_dtype(self, data, request): npdtype = data.dtype.numpy_dtype - if npdtype == object or npdtype == np.float64: + if npdtype == object: # GH#33125 mark = pytest.mark.xfail( reason="GH#33125 astype doesn't recognize data.dtype" @@ -191,14 +191,6 @@ class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): - # ValueError: Names should be list-like for a MultiIndex - a = "a" - is_identity = groupby_apply_op(a) is a - if data_for_grouping.dtype.numpy_dtype == np.float64 and is_identity: - mark = pytest.mark.xfail( - reason="GH#33125 astype doesn't recognize data.dtype" - ) - request.node.add_marker(mark) super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) @@ -306,9 +298,6 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): class TestPrinting(BaseNumPyTests, base.BasePrintingTests): - @pytest.mark.xfail( - reason="GH#33125 PandasArray.astype does not recognize PandasDtype" - ) def test_series_repr(self, data): super().test_series_repr(data) From 5720f678aeea92508e7b54992f23b5cdea8b905c Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 19 Sep 2020 10:31:29 -0700 Subject: [PATCH 23/29] Remove unnecessary import from numpy_ --- pandas/core/arrays/numpy_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index d1047a1da236c..23a4a70734c81 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -5,7 +5,7 @@ from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib -from pandas._typing import Scalar, ArrayLike +from pandas._typing import Scalar from pandas.compat.numpy import function as nv from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs From 047de22a3d866eae09b18bcc5febfff3d5f4b36b Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 19 Sep 2020 11:52:24 -0700 Subject: [PATCH 24/29] Fix import formatting using isort --- pandas/tests/extension/decimal/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3997e3c465a5e..2895f33d5c887 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -7,7 +7,7 @@ import numpy as np from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.common import pandas_dtype, is_dtype_equal +from pandas.core.dtypes.common import is_dtype_equal, pandas_dtype import pandas as pd from pandas.api.extensions import no_default, register_extension_dtype From 3dcef566a1cd22903da4de1820796bce6580380c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 19 Sep 2020 21:53:46 +0200 Subject: [PATCH 25/29] add comment --- pandas/tests/extension/base/casting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 415722890dea4..039b42210224e 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -60,6 +60,8 @@ def test_astype_empty_dataframe(self, dtype): @pytest.mark.parametrize("copy", [True, False]) def test_astype_own_type(self, data, copy): + # ensure that astype returns the original object for equal dtype and copy=False + # https://github.com/pandas-dev/pandas/issues/28488 result = data.astype(data.dtype, copy=copy) assert (result is data) is (not copy) self.assert_extension_array_equal(result, data) From 854d2bb4b8cafe2313709061f0cdf8ee7c168b84 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 19 Sep 2020 13:18:10 -0700 Subject: [PATCH 26/29] Add changes to whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5882b74aa8b05..cf28ecbfefcfe 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -361,6 +361,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`) +- Fixed bug where `ExtensionArray.astype(dtype=?????, copy=False)` would return a new object (:issue:`284881) - From 42cd9409b36a90052bb1c2300233652fbb3c58a8 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Sat, 19 Sep 2020 13:54:35 -0700 Subject: [PATCH 27/29] Fix .rst typo --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cf28ecbfefcfe..f66b0ae706c48 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -361,7 +361,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`) -- Fixed bug where `ExtensionArray.astype(dtype=?????, copy=False)` would return a new object (:issue:`284881) +- Fixed bug where `ExtensionArray.astype(dtype=?????, copy=False)` would return a new object (:issue:`284881`) - From 10f0f6414c47ff0937d3183bf78ff6d0819d0016 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Mon, 21 Sep 2020 07:43:18 -0700 Subject: [PATCH 28/29] Update doc/source/whatsnew/v1.2.0.rst Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f66b0ae706c48..af63d49a24d7a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -361,7 +361,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`) -- Fixed bug where `ExtensionArray.astype(dtype=?????, copy=False)` would return a new object (:issue:`284881`) +- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`) - From 3f34d284a3b2ae048fdc14b82b95224289dce7e7 Mon Sep 17 00:00:00 2001 From: Tomasz Sakrejda Date: Mon, 21 Sep 2020 07:43:30 -0700 Subject: [PATCH 29/29] Update pandas/tests/extension/test_numpy.py Co-authored-by: Joris Van den Bossche --- pandas/tests/extension/test_numpy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index cdcdb09b58e28..c4afcd7a536df 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -298,8 +298,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): class TestPrinting(BaseNumPyTests, base.BasePrintingTests): - def test_series_repr(self, data): - super().test_series_repr(data) + pass @skip_nested