Skip to content

TST: base test for ExtensionArray.astype to its own type + copy keyword #35116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Sep 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2b8f738
Add test for self dtype
Jul 2, 2020
6cc98c9
Fix boolean astype method copying behaior
Jul 2, 2020
ecbc762
Fix integer IntegerArray astype, same problem
Jul 2, 2020
4fa6d92
Hack in fix for PandasArray.astype()
Jul 2, 2020
6ff48ea
Same hacky fix for period
Jul 2, 2020
1470c6b
Same hacky fix for sparse
Jul 2, 2020
9e96547
Fix StringArray in same hacky way, does not fix __eq__ problem
Jul 2, 2020
2f287ca
Fix DecimalArray in same hacky way
Jul 2, 2020
c7baa62
Undo removal of string tests
Jul 3, 2020
b68f4fb
Fix formatting to pass 'black pandas'
Jul 3, 2020
462704d
Add changes to whatsnew
Jul 3, 2020
609b255
Merge branch 'master' into extension-base-test
Jul 19, 2020
b761926
Merge branch 'master' into extension-base-test
tomaszps Sep 6, 2020
72cb49e
Update v1.1.0.rst
tomaszps Sep 6, 2020
ced29ed
Temporarily disable TestCasting for strings
Sep 6, 2020
7762eb9
Merge branch 'extension-base-test' of https://github.com/tomaszps/pan…
Sep 6, 2020
4ff47fb
Revert "Temporarily disable TestCasting for strings"
Sep 8, 2020
d5095d4
Fix style in `sparse/array.py` to match other changes
Sep 8, 2020
5f464a6
Remove redundant copy check from `string_.py`
Sep 9, 2020
7f45370
Add 'if copy' check to base ExtensionArray astype class
Sep 9, 2020
f134cf4
Clean up small formatting/import issues in numpy_.py
Sep 9, 2020
fbbe28a
Swap `dtype == self.dtype` checks for `is_dtype_equal`
Sep 9, 2020
dcd884d
Remove pointless astype method definition in `PandasArray`
Sep 10, 2020
a4a32db
Make DecimalArray use preexisting code for own-type copy
Sep 10, 2020
b5b1ccc
Fix passing xfail tests in test_numpy.py
Sep 10, 2020
5720f67
Remove unnecessary import from numpy_
Sep 19, 2020
7155e0f
Merge branch 'master' into extension-base-test
Sep 19, 2020
047de22
Fix import formatting using isort
Sep 19, 2020
3dcef56
add comment
jorisvandenbossche Sep 19, 2020
854d2bb
Add changes to whatsnew
Sep 19, 2020
42cd940
Fix .rst typo
Sep 19, 2020
10f0f64
Update doc/source/whatsnew/v1.2.0.rst
tomaszps Sep 21, 2020
3f34d28
Update pandas/tests/extension/test_numpy.py
tomaszps Sep 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ ExtensionArray
^^^^^^^^^^^^^^

- Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`)
- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`)
-


Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ def astype(self, dtype, copy=True):
from pandas.core.arrays.string_ import StringDtype

dtype = pandas_dtype(dtype)
if is_dtype_equal(dtype, self.dtype):
if not copy:
return self
elif copy:
return self.copy()
if isinstance(dtype, StringDtype): # allow conversion to StringArrays
return dtype.construct_array_type()._from_sequence(self, copy=False)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,10 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:

if isinstance(dtype, BooleanDtype):
values, mask = coerce_to_array(self, copy=copy)
return BooleanArray(values, mask, copy=False)
if not copy:
return self
else:
return BooleanArray(values, mask, copy=False)
elif isinstance(dtype, StringDtype):
return dtype.construct_array_type()._from_sequence(self, copy=False)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
TD64NS_DTYPE,
ensure_object,
is_datetime64_dtype,
is_dtype_equal,
is_float_dtype,
is_period_dtype,
pandas_dtype,
Expand Down Expand Up @@ -582,7 +583,11 @@ def astype(self, dtype, copy: bool = True):
# We handle Period[T] -> Period[U]
# Our parent handles everything else.
dtype = pandas_dtype(dtype)

if is_dtype_equal(dtype, self._dtype):
if not copy:
return self
elif copy:
return self.copy()
if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
return super().astype(dtype, copy=copy)
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,11 @@ def astype(self, dtype=None, copy=True):
IntIndex
Indices: array([2, 3], dtype=int32)
"""
if is_dtype_equal(dtype, self._dtype):
if not copy:
return self
elif copy:
return self.copy()
dtype = self.dtype.update_dtype(dtype)
subtype = dtype._subtype_with_str
# TODO copy=False is broken for astype_nansafe with int -> float, so cannot
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/extension/base/casting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

import pandas as pd
from pandas.core.internals import ObjectBlock
Expand Down Expand Up @@ -56,3 +57,11 @@ def test_astype_empty_dataframe(self, dtype):
df = pd.DataFrame()
result = df.astype(dtype)
self.assert_frame_equal(result, df)

@pytest.mark.parametrize("copy", [True, False])
def test_astype_own_type(self, data, copy):
# ensure that astype returns the original object for equal dtype and copy=False
# https://github.com/pandas-dev/pandas/issues/28488
result = data.astype(data.dtype, copy=copy)
assert (result is data) is (not copy)
self.assert_extension_array_equal(result, data)
7 changes: 5 additions & 2 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np

from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.common import is_dtype_equal, pandas_dtype

import pandas as pd
from pandas.api.extensions import no_default, register_extension_dtype
Expand Down Expand Up @@ -131,9 +131,12 @@ def copy(self):
return type(self)(self._data.copy())

def astype(self, dtype, copy=True):
if is_dtype_equal(dtype, self._dtype):
if not copy:
return self
dtype = pandas_dtype(dtype)
if isinstance(dtype, type(self.dtype)):
return type(self)(self._data, context=dtype.context)
return type(self)(self._data, copy=copy, context=dtype.context)

return super().astype(dtype, copy=copy)

Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def test_take_series(self, data):

def test_loc_iloc_frame_single_dtype(self, data, request):
npdtype = data.dtype.numpy_dtype
if npdtype == object or npdtype == np.float64:
if npdtype == object:
# GH#33125
mark = pytest.mark.xfail(
reason="GH#33125 astype doesn't recognize data.dtype"
Expand All @@ -191,14 +191,6 @@ class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
def test_groupby_extension_apply(
self, data_for_grouping, groupby_apply_op, request
):
# ValueError: Names should be list-like for a MultiIndex
a = "a"
is_identity = groupby_apply_op(a) is a
if data_for_grouping.dtype.numpy_dtype == np.float64 and is_identity:
mark = pytest.mark.xfail(
reason="GH#33125 astype doesn't recognize data.dtype"
)
request.node.add_marker(mark)
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)


Expand Down Expand Up @@ -306,11 +298,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):


class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
@pytest.mark.xfail(
reason="GH#33125 PandasArray.astype does not recognize PandasDtype"
)
def test_series_repr(self, data):
super().test_series_repr(data)
pass


@skip_nested
Expand Down