-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
implement astype portion of #24024 #24405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
6a5c216
1a9f30b
1b109b8
f271005
5615b9f
d5cca5a
184f59f
df39bd7
e41068a
6f108dd
b123d08
207ffb9
04efd45
3fca810
5fa32e9
5d718e6
33b5434
e29d898
a3c42f0
eac662b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,9 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, | ||
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, | ||
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, | ||
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, | ||
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
@@ -469,6 +470,35 @@ def __iter__(self): | |
for v in converted: | ||
yield v | ||
|
||
def astype(self, dtype, copy=True): | ||
# We handle | ||
# --> datetime | ||
# --> period | ||
# DatetimeLikeArrayMixin Super handles the rest. | ||
dtype = pandas_dtype(dtype) | ||
|
||
if (is_datetime64_ns_dtype(dtype) and | ||
not is_dtype_equal(dtype, self.dtype)): | ||
# GH#18951: datetime64_ns dtype but not equal means different tz | ||
new_tz = getattr(dtype, 'tz', None) | ||
if getattr(self.dtype, 'tz', None) is None: | ||
return self.tz_localize(new_tz) | ||
result = self.tz_convert(new_tz) | ||
if new_tz is None: | ||
# Do we want .astype('datetime64[ns]') to be an ndarray. | ||
# The astype in Block._astype expects this to return an | ||
# ndarray, but we could maybe work around it there. | ||
result = result._data | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return result | ||
elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype, | ||
dtype): | ||
if copy: | ||
return self.copy() | ||
return self | ||
elif is_period_dtype(dtype): | ||
return self.to_period(freq=dtype.freq) | ||
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noticed... it'd be nice to leave a bunch of Actually... I think Python2 will force us to make this changes when we switch inheritance to composition, since we won't be able to call the unbound method with a DatetimeIndex anymore (I think). |
||
|
||
# ---------------------------------------------------------------- | ||
# ExtensionArray Interface | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,10 +13,8 @@ | |
from pandas.util._decorators import Appender, cache_readonly | ||
|
||
from pandas.core.dtypes.common import ( | ||
ensure_int64, is_bool_dtype, is_categorical_dtype, | ||
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, | ||
is_integer, is_integer_dtype, is_list_like, is_object_dtype, | ||
is_period_dtype, is_scalar, is_string_dtype) | ||
ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer, | ||
is_integer_dtype, is_list_like, is_period_dtype, is_scalar, pandas_dtype) | ||
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries | ||
|
||
from pandas.core import algorithms, ops | ||
|
@@ -40,6 +38,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): | |
# override DatetimeLikeArrayMixin method | ||
copy = Index.copy | ||
unique = Index.unique | ||
view = Index.view | ||
|
||
# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are | ||
# properties there. They can be made into cache_readonly for Index | ||
|
@@ -527,24 +526,23 @@ def _maybe_box_as_values(self, values, **attribs): | |
# - sort_values | ||
return values | ||
|
||
@Appender(_index_shared_docs['astype']) | ||
def astype(self, dtype, copy=True): | ||
if is_object_dtype(dtype): | ||
return self._box_values_as_index() | ||
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): | ||
return Index(self.format(), name=self.name, dtype=object) | ||
elif is_integer_dtype(dtype): | ||
# TODO(DatetimeArray): use self._values here. | ||
# Can't use ._values currently, because that returns a | ||
# DatetimeIndex, which throws us in an infinite loop. | ||
return Index(self.values.astype('i8', copy=copy), name=self.name, | ||
dtype='i8') | ||
elif (is_datetime_or_timedelta_dtype(dtype) and | ||
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): | ||
# disallow conversion between datetime/timedelta, | ||
# and conversions for any datetimelike to float | ||
msg = 'Cannot cast {name} to dtype {dtype}' | ||
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) | ||
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) | ||
if is_dtype_equal(self.dtype, dtype) and copy is False: | ||
# Ensure that self.astype(self.dtype) is self | ||
return self | ||
|
||
new_values = self._eadata.astype(dtype, copy=copy) | ||
|
||
# we pass `dtype` to the Index constructor, for cases like | ||
# dtype=object to disable inference. But, DTA.astype ignores | ||
# integer sign and size, so we need to detect that case and | ||
# just choose int64. | ||
dtype = pandas_dtype(dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure this is necessary as it already coerces properly, doing it here is very weird.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you address this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not in the last 8 hours, no. May need to wait on Tom to clarify, since all of this was taken from 24024. (the fact that these things get closer attention in smaller doses reassures me that splitting is a good idea, even if it does cause rebasing hassles in the parent PR) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, sounds ok then. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What’s the question here? Why we do the integer check? Astype ignores the sign and size. I suppose the index constructor just ignores the size? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if you saw the part about uint vs. int. So I'm just going to decide that the expected behavior for @jbrockmendel do you want to do that here? It's not at all tested, and will need a release note. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok by me (of course its weird to do this, but hey) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I tried this, pretty much just deleting ten lines here, and ended up getting two failures in pandas/tests/indexes/interval/test_astype.py. I can fix this by changing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Attempt #2 at this also failed. Any other ideas? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I missed this note last night. I implemented this in 3fca810 if you could take a look. |
||
if is_integer_dtype(dtype): | ||
dtype = np.dtype("int64") | ||
|
||
return Index(new_values, dtype=dtype, name=self.name) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@Appender(DatetimeLikeArrayMixin._time_shift.__doc__) | ||
def _time_shift(self, periods, freq=None): | ||
|
Uh oh!
There was an error while loading. Please reload this page.