-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Assorted DatetimeIndex bugfixes #24157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
092b3b9
fbcc04b
ac52857
121c373
ef66bb9
91738d3
bb0d065
ff3a5c0
e54159d
44e0126
12e0f4e
28bf2de
06d0a8e
81c7d0f
97f976f
9e55d97
c7f280f
af303c7
8ece686
30f01a0
9617b85
3731098
df05c88
8f39b23
e958ce6
97cb6b3
36d37e4
3fc1c19
88f7094
e178bb9
50f6b7e
b927925
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -327,6 +327,10 @@ def __getitem__(self, key): | |
"numpy.newaxis (`None`) and integer or boolean " | ||
"arrays are valid indices") | ||
|
||
if key is Ellipsis: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make a more explicit comment. This is likely a more general fix (IOW this is likely broken for all There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why don't this just work as is? why is this not a view (and not a copy)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
as-is
Since the copy doesn't have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you be more explicit in your comment, this is not obvious There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved to a hopefully clearer spot a few lines down |
||
# GH#21282 | ||
return self.copy() | ||
|
||
getitem = self._data.__getitem__ | ||
if is_int: | ||
val = getitem(key) | ||
|
@@ -547,9 +551,17 @@ def _validate_frequency(cls, index, freq, **kwargs): | |
if index.size == 0 or inferred == freq.freqstr: | ||
return None | ||
|
||
on_freq = cls._generate_range(start=index[0], end=None, | ||
periods=len(index), freq=freq, **kwargs) | ||
if not np.array_equal(index.asi8, on_freq.asi8): | ||
try: | ||
on_freq = cls._generate_range(start=index[0], end=None, | ||
periods=len(index), freq=freq, | ||
**kwargs) | ||
assert np.array_equal(index.asi8, on_freq.asi8) | ||
except (ValueError, AssertionError) as e: | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if "non-fixed" in str(e): | ||
# non-fixed frequencies are not meaningful for timedelta64; | ||
# we retain that error message | ||
raise e | ||
# GH#11587 if index[0] is NaT _generate_range will raise ValueError | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise ValueError('Inferred frequency {infer} from passed values ' | ||
'does not conform to passed frequency {passed}' | ||
.format(infer=inferred, passed=freq.freqstr)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,9 @@ | |
from pandas.util._decorators import Appender | ||
|
||
from pandas.core.dtypes.common import ( | ||
_INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, | ||
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, | ||
is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, | ||
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, | ||
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
@@ -264,6 +264,8 @@ def _generate_range(cls, start, end, periods, freq, tz=None, | |
if closed is not None: | ||
raise ValueError("Closed has to be None if not both of start" | ||
"and end are defined") | ||
if start is NaT or end is NaT: | ||
raise ValueError("Neither `start` nor `end` can be NaT") | ||
|
||
left_closed, right_closed = dtl.validate_endpoints(closed) | ||
|
||
|
@@ -1647,6 +1649,19 @@ def maybe_convert_dtype(data, copy): | |
raise TypeError("Passing PeriodDtype data is invalid. " | ||
"Use `data.to_timestamp()` instead") | ||
|
||
elif is_categorical_dtype(data): | ||
# TODO: cases where we need to do another pass through this func, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is the TODO case here, what is an example? do you have an xfailed test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. example is Categorical(TimedeltaIndex) where we should do another pass through maybe_convert_dtype in order to issue the FutureWarning. I'm assuming there are other corner cases here that will need to be caught/tested. |
||
# e.g. the categories are timedelta64s | ||
if isna(data).any(): | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# slow-path | ||
data = list(data) | ||
data = np.array(data, dtype=np.object_) | ||
copy = False | ||
else: | ||
# TODO: does this always make a copy? If so, set copy=False | ||
# GH#18664 preserve tz in going DTI->Categorical->DTI | ||
data = data.categories[data.codes] | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
elif is_extension_type(data) and not is_datetime64tz_dtype(data): | ||
# Includes categorical | ||
# TODO: We have no tests for these | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,12 +14,51 @@ | |
from pandas import ( | ||
DatetimeIndex, Index, Timestamp, date_range, datetime, offsets, | ||
to_datetime) | ||
from pandas.core.arrays import period_array | ||
from pandas.core.arrays import ( | ||
DatetimeArrayMixin as DatetimeArray, period_array) | ||
import pandas.util.testing as tm | ||
|
||
|
||
class TestDatetimeIndex(object): | ||
|
||
@pytest.mark.parametrize('dt_cls', [DatetimeIndex, DatetimeArray]) | ||
def test_freq_validation_with_nat(self, dt_cls): | ||
# GH#11587 make sure we get a useful error message when generate_range | ||
# raises | ||
msg = ("Inferred frequency None from passed values does not conform " | ||
"to passed frequency D") | ||
with pytest.raises(ValueError, match=msg): | ||
dt_cls([pd.NaT, pd.Timestamp('2011-01-01')], freq='D') | ||
with pytest.raises(ValueError, match=msg): | ||
dt_cls([pd.NaT, pd.Timestamp('2011-01-01').value], | ||
freq='D') | ||
|
||
def test_categorical_preserves_tz(self): | ||
# GH#18664 retain tz when going DTI-->Categorical-->DTI | ||
# TODO: parametrize over DatetimeIndex/DatetimeArray | ||
# once CategoricalIndex(DTA) works | ||
|
||
dti = pd.DatetimeIndex( | ||
[pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'], | ||
tz='US/Eastern') | ||
ci = pd.CategoricalIndex(dti) | ||
carr = pd.Categorical(dti) | ||
cser = pd.Series(ci) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parametrize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will take a look There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not without adding casting code that makes the verbosity a wash. Once Categorical(DatetimeArray) works (currently raises bc DatetimeArray doesn't have _constructor) then this test can be parametrized over DatetimeIndex/DatetimeArray |
||
for obj in [ci, carr, cser]: | ||
result = pd.DatetimeIndex(obj) | ||
tm.assert_index_equal(result, dti) | ||
|
||
# no-NaT case has a fastpath | ||
dti2 = dti[1:] | ||
ci2 = pd.CategoricalIndex(dti2) | ||
carr2 = pd.Categorical(dti2) | ||
cser2 = pd.Series(ci2) | ||
|
||
for obj in [ci2, carr2, cser2]: | ||
result = pd.DatetimeIndex(obj) | ||
tm.assert_index_equal(result, dti2) | ||
|
||
def test_dti_with_period_data_raises(self): | ||
# GH#23675 | ||
data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
from datetime import date, datetime, timedelta | ||
import functools | ||
import operator | ||
import warnings | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prob going to fail the build as this is not needed |
||
|
||
from dateutil.easter import easter | ||
import numpy as np | ||
|
@@ -2487,6 +2488,9 @@ def generate_range(start=None, end=None, periods=None, | |
|
||
""" | ||
if time_rule is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you need to change any existing tests to account for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, tests.indexes.datetimes.test_date_range test_generate and test_generate_cday There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this sufficiently internal that we don't need to do a deprecation cycle? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, let's just drop it entirely (fix the tests). I don't think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. OK with moving this to arrays.datetimes while we're at it? |
||
warnings.warn("`time_rule` is deprecated and will be removed in a " | ||
"future version. Use `offset` instead.", | ||
FutureWarning, stacklevel=2) | ||
from pandas.tseries.frequencies import get_offset | ||
|
||
offset = get_offset(time_rule) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these don't jive with what's at the top of the PR. missing 2 notes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what about #11587 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it merit a note? All that is changed is an error message. But yes, this does close 11587, I'll update this above.