-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: move Block.astype implementation to dtypes/cast.py #40141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
648e005
2f52197
aefc462
01941b7
76d1e1c
3133809
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import inspect | ||
|
||
import numpy as np | ||
|
||
from pandas._typing import ( | ||
ArrayLike, | ||
DtypeObj, | ||
) | ||
|
||
from pandas.core.dtypes.cast import ( | ||
astype_dt64_to_dt64tz, | ||
astype_nansafe, | ||
) | ||
from pandas.core.dtypes.common import ( | ||
is_datetime64_dtype, | ||
is_datetime64tz_dtype, | ||
is_dtype_equal, | ||
pandas_dtype, | ||
) | ||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||
|
||
from pandas.core.arrays import ExtensionArray | ||
|
||
|
||
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False): | ||
""" | ||
Cast array to the new dtype. | ||
|
||
Parameters | ||
---------- | ||
values : ndarray or ExtensionArray | ||
dtype : dtype object | ||
copy : bool, default False | ||
copy if indicated | ||
|
||
Returns | ||
------- | ||
ndarray or ExtensionArray | ||
""" | ||
if ( | ||
values.dtype.kind in ["m", "M"] | ||
and dtype.kind in ["i", "u"] | ||
and isinstance(dtype, np.dtype) | ||
and dtype.itemsize != 8 | ||
): | ||
# TODO(2.0) remove special case once deprecation on DTA/TDA is enforced | ||
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" | ||
raise TypeError(msg) | ||
|
||
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): | ||
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) | ||
|
||
if is_dtype_equal(values.dtype, dtype): | ||
if copy: | ||
return values.copy() | ||
return values | ||
|
||
if isinstance(values, ExtensionArray): | ||
values = values.astype(dtype, copy=copy) | ||
|
||
else: | ||
values = astype_nansafe(values, dtype, copy=copy) | ||
|
||
# now in ObjectBlock._maybe_coerce_values(cls, values): | ||
if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): | ||
values = np.array(values, dtype=object) | ||
|
||
return values | ||
|
||
|
||
def astype_array_safe(values, dtype, copy: bool = False, errors: str = "raise"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. values: ArrayLike; dtype: DtypeObj? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mypy isn't smart enough for that (we also didn't annotate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sense. if we move the pandas_dtype call up into the caller we can do it once instead of (block|array)-wise There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Then also the |
||
""" | ||
Cast array to the new dtype. | ||
|
||
Parameters | ||
---------- | ||
values : ndarray or ExtensionArray | ||
dtype : str, dtype convertible | ||
copy : bool, default False | ||
copy if indicated | ||
errors : str, {'raise', 'ignore'}, default 'raise' | ||
- ``raise`` : allow exceptions to be raised | ||
- ``ignore`` : suppress exceptions. On error return original object | ||
|
||
Returns | ||
------- | ||
ndarray or ExtensionArray | ||
""" | ||
errors_legal_values = ("raise", "ignore") | ||
|
||
if errors not in errors_legal_values: | ||
invalid_arg = ( | ||
"Expected value of kwarg 'errors' to be one of " | ||
f"{list(errors_legal_values)}. Supplied value is '{errors}'" | ||
) | ||
raise ValueError(invalid_arg) | ||
|
||
if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): | ||
msg = ( | ||
f"Expected an instance of {dtype.__name__}, " | ||
"but got the class instead. Try instantiating 'dtype'." | ||
) | ||
raise TypeError(msg) | ||
|
||
dtype = pandas_dtype(dtype) | ||
|
||
try: | ||
new_values = astype_array(values, dtype, copy=copy) | ||
except (ValueError, TypeError): | ||
# e.g. astype_nansafe can fail on object-dtype of strings | ||
# trying to convert to float | ||
if errors == "ignore": | ||
new_values = values | ||
else: | ||
raise | ||
|
||
return new_values |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
from __future__ import annotations | ||
|
||
import inspect | ||
import re | ||
from typing import ( | ||
TYPE_CHECKING, | ||
|
@@ -36,8 +35,6 @@ | |
from pandas.util._validators import validate_bool_kwarg | ||
|
||
from pandas.core.dtypes.cast import ( | ||
astype_dt64_to_dt64tz, | ||
astype_nansafe, | ||
can_hold_element, | ||
find_common_type, | ||
infer_dtype_from, | ||
|
@@ -49,7 +46,6 @@ | |
) | ||
from pandas.core.dtypes.common import ( | ||
is_categorical_dtype, | ||
is_datetime64_dtype, | ||
is_datetime64tz_dtype, | ||
is_dtype_equal, | ||
is_extension_array_dtype, | ||
|
@@ -76,6 +72,7 @@ | |
) | ||
|
||
import pandas.core.algorithms as algos | ||
from pandas.core.array_algos.cast import astype_array_safe | ||
from pandas.core.array_algos.putmask import ( | ||
extract_bool_array, | ||
putmask_inplace, | ||
|
@@ -652,33 +649,11 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): | |
------- | ||
Block | ||
""" | ||
errors_legal_values = ("raise", "ignore") | ||
|
||
if errors not in errors_legal_values: | ||
invalid_arg = ( | ||
"Expected value of kwarg 'errors' to be one of " | ||
f"{list(errors_legal_values)}. Supplied value is '{errors}'" | ||
) | ||
raise ValueError(invalid_arg) | ||
|
||
if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): | ||
msg = ( | ||
f"Expected an instance of {dtype.__name__}, " | ||
"but got the class instead. Try instantiating 'dtype'." | ||
) | ||
raise TypeError(msg) | ||
|
||
dtype = pandas_dtype(dtype) | ||
values = self.values | ||
if values.dtype.kind in ["m", "M"]: | ||
values = self.array_values() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could move this into astype_array_safe and use ensure_wrapped_if_datetimelike; would make it robust to AM/BM (though i think both AM and BM now have PRs to make the arrays EAs to begin with) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since ArrayManager already stores it as EAs (after this array), I would prefer to leave it here (then your PR changing to store EAs in BlockManager as well can remove those two lines) |
||
|
||
try: | ||
new_values = self._astype(dtype, copy=copy) | ||
except (ValueError, TypeError): | ||
# e.g. astype_nansafe can fail on object-dtype of strings | ||
# trying to convert to float | ||
if errors == "ignore": | ||
new_values = self.values | ||
else: | ||
raise | ||
new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) | ||
|
||
newb = self.make_block(new_values) | ||
if newb.shape != self.shape: | ||
|
@@ -689,37 +664,6 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): | |
) | ||
return newb | ||
|
||
def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike: | ||
values = self.values | ||
if values.dtype.kind in ["m", "M"]: | ||
values = self.array_values() | ||
|
||
if ( | ||
values.dtype.kind in ["m", "M"] | ||
and dtype.kind in ["i", "u"] | ||
and isinstance(dtype, np.dtype) | ||
and dtype.itemsize != 8 | ||
): | ||
# TODO(2.0) remove special case once deprecation on DTA/TDA is enforced | ||
msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" | ||
raise TypeError(msg) | ||
|
||
if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): | ||
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) | ||
|
||
if is_dtype_equal(values.dtype, dtype): | ||
if copy: | ||
return values.copy() | ||
return values | ||
|
||
if isinstance(values, ExtensionArray): | ||
values = values.astype(dtype, copy=copy) | ||
|
||
else: | ||
values = astype_nansafe(values, dtype, copy=copy) | ||
|
||
return values | ||
|
||
def convert( | ||
self, | ||
copy: bool = True, | ||
|
Uh oh!
There was an error while loading. Please reload this page.