-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF/DEPR: DatetimeIndex constructor #23675
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
f8efbef
9d20bc9
aef3f4c
66ae42b
d0e8ee3
e1f4e17
d18e0df
a4c8c77
5dc5980
7e5587e
e94e826
7464d15
80b5dbe
3c822f1
ba7e5e8
3ba9da7
49c11e1
f1d3fd8
d44055e
1471a2b
11b5f6c
9f56d23
1c3a5aa
be4d472
145772d
7c99105
6b60da2
a7038bb
14d923b
c9dbf24
ce9914d
b3d5bb7
09c88fc
0367d6f
7cc8577
b3a096b
fd5af18
2cdd215
782ca81
03d5b35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,10 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
_NS_DTYPE, | ||
is_float_dtype, | ||
is_timedelta64_dtype, | ||
is_period_dtype, | ||
is_extension_type, | ||
is_object_dtype, | ||
is_int64_dtype, | ||
is_datetime64tz_dtype, | ||
|
@@ -186,7 +190,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): | |
_freq = None | ||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, tz=None, **kwargs): | ||
def _simple_new(cls, values, freq=None, tz=None): | ||
""" | ||
we require the we have a dtype compat for the values | ||
if we are passed a non-dtype compat, then coerce using the constructor | ||
|
@@ -1408,6 +1412,154 @@ def to_julian_date(self): | |
DatetimeArrayMixin._add_datetimelike_methods() | ||
|
||
|
||
# ------------------------------------------------------------------- | ||
# Constructor Helpers | ||
|
||
def maybe_infer_tz(tz, inferred_tz): | ||
""" | ||
If a timezone is inferred from data, check that it is compatible with | ||
the user-provided timezone, if any. | ||
|
||
Parameters | ||
---------- | ||
tz : tzinfo or None | ||
inferred_tz : tzinfo or None | ||
|
||
Returns | ||
------- | ||
tz : tzinfo or None | ||
|
||
Raises | ||
------ | ||
TypeError : if both timezones are present but do not match | ||
""" | ||
if tz is None: | ||
tz = inferred_tz | ||
elif inferred_tz is None: | ||
pass | ||
elif not timezones.tz_compare(tz, inferred_tz): | ||
raise TypeError('data is already tz-aware {inferred_tz}, unable to ' | ||
'set specified tz: {tz}' | ||
.format(inferred_tz=inferred_tz, tz=tz)) | ||
return tz | ||
|
||
|
||
def dtype_conversions(data, copy, has_format=False): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Convert data based on dtype conventions, issuing deprecation warnings | ||
or errors where appropriate. | ||
|
||
Parameters | ||
---------- | ||
data : np.ndarray or pd.Index | ||
copy : bool | ||
has_format : bool, default False | ||
Indicates if the data will be passed to a parsing function with a | ||
`format` kwarg. | ||
|
||
Returns | ||
------- | ||
data : np.ndarray or pd.Index | ||
copy : bool | ||
|
||
Raises | ||
------ | ||
TypeError : PeriodDType data is passed | ||
""" | ||
|
||
if is_float_dtype(data) and not has_format: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Note: we must cast to datetime64[ns] here in order to treat these | ||
# as wall-times instead of UTC timestamps. | ||
data = data.astype(_NS_DTYPE) | ||
copy = False | ||
# TODO: Why do we treat this differently from integer dtypes? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On which difference are you pointing here? The wall time vs utc ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i agree this seems a little odd, these should just be cast to integer i think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback i think all comments except this one have been addressed (a few minutes away from green if all goes well). Changing this would be a breaking change, albeit a small one. Should we change now, deprecate now, or leave for a later PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok for this pr yeah should fix / deprecate whatever is needed later |
||
|
||
elif is_timedelta64_dtype(data): | ||
warnings.warn("Passing timedelta64-dtype data to {cls} is " | ||
"deprecated, will raise a TypeError in a future " | ||
"version".format(cls="TimedeltaIndex/Array"), | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
FutureWarning, stacklevel=3) | ||
data = data.view(_NS_DTYPE) | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
elif is_period_dtype(data): | ||
# Note: without explicitly raising here, PeriondIndex | ||
# test_setops.test_join_does_not_recur fails | ||
raise TypeError("Passing PeriodDtype data to {cls} is invalid. " | ||
"Use `data.to_timestamp()` instead" | ||
.format(cls="TimedeltaIndex/Array")) | ||
|
||
elif is_extension_type(data) and not is_datetime64tz_dtype(data): | ||
# Includes categorical | ||
# TODO: We have no tests for these | ||
data = np.array(data, dtype=np.object_) | ||
copy = False | ||
|
||
return data, copy | ||
|
||
|
||
def _objects_to_datetime64ns(data, dayfirst, yearfirst): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Convert data to array of timestamps. | ||
|
||
Parameters | ||
---------- | ||
data : np.ndarray[object] | ||
dayfirst : bool | ||
yearfirst : bool | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
result : ndarray | ||
np.int64 dtype if returned values represent UTC timestamps | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to verify: getting an you'll get an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. correct |
||
np.datetime64[ns] if returned values represent wall times | ||
inferred_tz : tzinfo or None | ||
|
||
Raises | ||
------ | ||
ValueError : if data cannot be converted to datetimes | ||
""" | ||
errors = "raise" | ||
tz = None | ||
require_iso8601 = False | ||
|
||
# if str-dtype, convert | ||
data = np.array(data, copy=False, dtype=np.object_) | ||
|
||
try: | ||
result, tz_parsed = tslib.array_to_datetime( | ||
data, | ||
errors=errors, | ||
utc=tz == 'utc', | ||
dayfirst=dayfirst, | ||
yearfirst=yearfirst, | ||
require_iso8601=require_iso8601 | ||
) | ||
except ValueError as e: | ||
try: | ||
values, tz = conversion.datetime_to_datetime64(data) | ||
# If tzaware, these values represent unix timestamps, so we | ||
# return them as i8 to distinguish from wall times | ||
return values.view('i8'), tz | ||
except (ValueError, TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this addtiional level of try/except here? wouldn't this just raise anyhow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably to re-raise the original exception if the fallback fails. This is the existing behavior. I think @mroeschke and I are vaguely planning to revisit this before long and combine datetime_to_datetime64 into array_to_datetime, fixing the many idiosyncracies of these calls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe i wasn't clear. I think you can simply remove the try/except and it will work the way it is now. (and same below). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the claim you're making is that it will raise under the same conditions, I agree. If the claim is that it will raise the same exception, I don't. i.e. if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. really? i don't think that is actually possible. the original exception is re-raised here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback did this resolve the miscommunication? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again if datetime_to_datetime64 raises There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sounds like a "no" on the resolving miscommunication. Did I at least accurately summarize your suggested change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel is correct here. side-note, we could clarify all this with python-3 style except (ValueError, TypeError) as e2:
raise e2 from e or six's https://pythonhosted.org/six/#six.raise_from, but it's probably just easier to wait until next month. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
not resolved, but I see from @TomAugspurger which what i was getting at. i guess ok for now. |
||
raise e | ||
|
||
if tz_parsed is not None: | ||
# We can take a shortcut since the datetime64 numpy array | ||
# is in UTC | ||
# Return i8 values to denote unix timestamps | ||
return result.view('i8'), tz_parsed | ||
elif is_datetime64_dtype(result): | ||
# returning M8[ns] denotes wall-times; since tz is None | ||
# the distinction is a thin one | ||
return result, tz | ||
elif is_object_dtype(result): | ||
# e.g. an Index of datetime objects; raise and let the | ||
# calling function salvage the result if desired | ||
raise ValueError(result) | ||
else: # pragma: no cover | ||
raise TypeError(result) | ||
|
||
|
||
def _generate_regular_range(cls, start, end, periods, freq): | ||
""" | ||
Generate a range of dates with the spans between dates described by | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,16 +17,17 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
_INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, | ||
is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, | ||
is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, | ||
is_string_like, pandas_dtype) | ||
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float, | ||
is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar, | ||
is_string_dtype, is_string_like, pandas_dtype) | ||
import pandas.core.dtypes.concat as _concat | ||
from pandas.core.dtypes.generic import ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
||
from pandas.core.arrays import datetimelike as dtl | ||
from pandas.core.arrays.datetimes import ( | ||
DatetimeArrayMixin as DatetimeArray, _to_m8) | ||
DatetimeArrayMixin as DatetimeArray, _objects_to_datetime64ns, _to_m8, | ||
dtype_conversions, maybe_infer_tz) | ||
from pandas.core.base import _shared_docs | ||
import pandas.core.common as com | ||
from pandas.core.indexes.base import Index, _index_shared_docs | ||
|
@@ -248,50 +249,59 @@ def __new__(cls, data=None, | |
name = data.name | ||
|
||
freq, freq_infer = dtl.maybe_infer_freq(freq) | ||
if freq is None and hasattr(data, "freq"): | ||
# i.e. DatetimeArray/Index | ||
# TODO: Should this be the stronger condition of `freq_infer`? | ||
freq = data.freq | ||
verify_integrity = False | ||
|
||
# if dtype has an embedded tz, capture it | ||
tz = dtl.validate_tz_from_dtype(dtype, tz) | ||
|
||
if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): | ||
# other iterable of some kind | ||
if not isinstance(data, (list, tuple)): | ||
if not hasattr(data, "dtype"): | ||
# e.g. list, tuple | ||
if np.ndim(data) == 0: | ||
# i.e. generator | ||
data = list(data) | ||
data = np.asarray(data, dtype='O') | ||
data = np.asarray(data) | ||
copy = False | ||
elif isinstance(data, ABCSeries): | ||
data = data._values | ||
|
||
# data must be Index or np.ndarray here | ||
if not (is_datetime64_dtype(data) or is_datetimetz(data) or | ||
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): | ||
data = tools.to_datetime(data, dayfirst=dayfirst, | ||
yearfirst=yearfirst) | ||
|
||
if isinstance(data, DatetimeArray): | ||
if tz is None: | ||
tz = data.tz | ||
elif data.tz is None: | ||
data = data.tz_localize(tz, ambiguous=ambiguous) | ||
else: | ||
# the tz's must match | ||
if not timezones.tz_compare(tz, data.tz): | ||
msg = ('data is already tz-aware {0}, unable to ' | ||
'set specified tz: {1}') | ||
raise TypeError(msg.format(data.tz, tz)) | ||
# By this point we are assured to have either a numpy array or Index | ||
|
||
data, copy = dtype_conversions(data, copy) | ||
|
||
if is_object_dtype(data) or is_string_dtype(data): | ||
# TODO: We do not have tests specific to string-dtypes, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could just write this as
might be more clear There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue with that is np.array(['20160405']) becomes np.array([20160405]) instead of 2016-04-05. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok sure |
||
# also complex or categorical or other extension | ||
copy = False | ||
if lib.infer_dtype(data) == 'integer': | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data = data.astype(np.int64) | ||
else: | ||
# data comes back here as either i8 to denote UTC timestamps | ||
# or M8[ns] to denote wall times | ||
data, inferred_tz = _objects_to_datetime64ns( | ||
data, dayfirst=dayfirst, yearfirst=yearfirst) | ||
tz = maybe_infer_tz(tz, inferred_tz) | ||
|
||
if is_datetime64tz_dtype(data): | ||
tz = maybe_infer_tz(tz, data.tz) | ||
subarr = data._data | ||
|
||
if freq is None: | ||
freq = data.freq | ||
verify_integrity = False | ||
elif issubclass(data.dtype.type, np.datetime64): | ||
elif is_datetime64_dtype(data): | ||
# DatetimeIndex or ndarray[datetime64] | ||
data = getattr(data, "_data", data) | ||
if data.dtype != _NS_DTYPE: | ||
data = conversion.ensure_datetime64ns(data) | ||
|
||
if tz is not None: | ||
# Convert tz-naive to UTC | ||
tz = timezones.maybe_get_tz(tz) | ||
data = conversion.tz_localize_to_utc(data.view('i8'), tz, | ||
ambiguous=ambiguous) | ||
subarr = data.view(_NS_DTYPE) | ||
|
||
else: | ||
# must be integer dtype otherwise | ||
# assume this data are epoch timestamps | ||
|
@@ -319,17 +329,15 @@ def __new__(cls, data=None, | |
return subarr._deepcopy_if_needed(ref_to_data, copy) | ||
|
||
@classmethod | ||
def _simple_new(cls, values, name=None, freq=None, tz=None, | ||
dtype=None, **kwargs): | ||
def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): | ||
""" | ||
we require the we have a dtype compat for the values | ||
if we are passed a non-dtype compat, then coerce using the constructor | ||
""" | ||
# DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes | ||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
result = super(DatetimeIndex, cls)._simple_new(values, freq, tz, | ||
**kwargs) | ||
result = super(DatetimeIndex, cls)._simple_new(values, freq, tz) | ||
result.name = name | ||
result._reset_identity() | ||
return result | ||
|
Uh oh!
There was an error while loading. Please reload this page.