diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fe4e461b0bd4f..aadfbdd4303c4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -634,6 +634,7 @@ class ExtensionOpsMixin(object): """ A base class for linking the operators to their dunder names """ + @classmethod def _add_arithmetic_ops(cls): cls.__add__ = cls._create_arithmetic_method(operator.add) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ec430e4bf17b1..7bb1c45998eb2 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -5,6 +5,7 @@ import numpy as np from pandas._libs import lib, iNaT, NaT +from pandas._libs.tslibs import timezones from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.period import ( DIFFERENT_FREQ_INDEX, IncompatibleFrequency) @@ -13,7 +14,7 @@ from pandas import compat from pandas.tseries import frequencies -from pandas.tseries.offsets import Tick +from pandas.tseries.offsets import Tick, DateOffset from pandas.core.dtypes.common import ( needs_i8_conversion, @@ -23,10 +24,13 @@ is_timedelta64_dtype, is_object_dtype) from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass +from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr +from .base import ExtensionOpsMixin + def _make_comparison_op(op, cls): # TODO: share code with indexes.base version? Main difference is that @@ -87,7 +91,7 @@ def _shallow_copy(self, values=None, **kwargs): return self._simple_new(values, **attributes) -class DatetimeLikeArrayMixin(AttributesMixin): +class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray @@ -464,7 +468,10 @@ def _addsub_offset_array(self, other, op): "{cls} not vectorized" .format(cls=type(self).__name__), PerformanceWarning) - res_values = op(self.astype('O').values, np.array(other)) + # For EA self.astype('O') returns a numpy array, not an Index + left = lib.values_from_object(self.astype('O')) + + res_values = op(left, np.array(other)) kwargs = {} if not is_period_dtype(self): kwargs['freq'] = 'infer' @@ -551,3 +558,96 @@ def validate_periods(periods): raise TypeError('periods must be a number, got {periods}' .format(periods=periods)) return periods + + +def validate_endpoints(closed): + """ + Check that the `closed` argument is among [None, "left", "right"] + + Parameters + ---------- + closed : {None, "left", "right"} + + Returns + ------- + left_closed : bool + right_closed : bool + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_closed = False + right_closed = False + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + + return left_closed, right_closed + + +def maybe_infer_freq(freq): + """ + Comparing a DateOffset to the string "infer" raises, so we need to + be careful about comparisons. Make a dummy variable `freq_infer` to + signify the case where the given freq is "infer" and set freq to None + to avoid comparison trouble later on. + + Parameters + ---------- + freq : {DateOffset, None, str} + + Returns + ------- + freq : {DateOffset, None} + freq_infer : bool + """ + freq_infer = False + if not isinstance(freq, DateOffset): + # if a passed freq is None, don't infer automatically + if freq != 'infer': + freq = frequencies.to_offset(freq) + else: + freq_infer = True + freq = None + return freq, freq_infer + + +def validate_tz_from_dtype(dtype, tz): + """ + If the given dtype is a DatetimeTZDtype, extract the implied + tzinfo object from it and check that it does not conflict with the given + tz. + + Parameters + ---------- + dtype : dtype, str + tz : None, tzinfo + + Returns + ------- + tz : consensus tzinfo + + Raises + ------ + ValueError : on tzinfo mismatch + """ + if dtype is not None: + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + dtz = getattr(dtype, 'tz', None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype" + " with a tz") + tz = dtz + except TypeError: + pass + return tz diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c5e85cb5892f4..78e6d1f222160 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -29,7 +29,7 @@ import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr -from pandas.tseries.frequencies import to_offset, DateOffset +from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Tick from pandas.core.arrays import datetimelike as dtl @@ -84,10 +84,11 @@ def f(self): return property(f) -def _dt_array_cmp(opname, cls): +def _dt_array_cmp(cls, op): """ Wrap comparison operations to convert datetime-like to datetime64 """ + opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False def wrapper(self, other): @@ -181,12 +182,10 @@ def __new__(cls, values, freq=None, tz=None): # e.g. DatetimeIndex tz = values.tz - if (freq is not None and not isinstance(freq, DateOffset) and - freq != 'infer'): - freq = to_offset(freq) + freq, freq_infer = dtl.maybe_infer_freq(freq) result = cls._simple_new(values, freq=freq, tz=tz) - if freq == 'infer': + if freq_infer: inferred = result.inferred_freq if inferred: result.freq = to_offset(inferred) @@ -289,17 +288,7 @@ def __iter__(self): # ----------------------------------------------------------------- # Comparison Methods - @classmethod - def _add_comparison_methods(cls): - """add in comparison methods""" - cls.__eq__ = _dt_array_cmp('__eq__', cls) - cls.__ne__ = _dt_array_cmp('__ne__', cls) - cls.__lt__ = _dt_array_cmp('__lt__', cls) - cls.__gt__ = _dt_array_cmp('__gt__', cls) - cls.__le__ = _dt_array_cmp('__le__', cls) - cls.__ge__ = _dt_array_cmp('__ge__', cls) - # TODO: Some classes pass __eq__ while others pass operator.eq; - # standardize this. + _create_comparison_method = classmethod(_dt_array_cmp) def _has_same_tz(self, other): zzone = self._timezone @@ -441,14 +430,7 @@ def _local_timestamps(self): This is used to calculate time-of-day information as if the timestamps were timezone-naive. """ - values = self.asi8 - indexer = values.argsort() - result = conversion.tz_convert(values.take(indexer), utc, self.tz) - - n = len(indexer) - reverse = np.empty(n, dtype=np.int_) - reverse.put(indexer, np.arange(n)) - return result.take(reverse) + return conversion.tz_convert(self.asi8, utc, self.tz) def tz_convert(self, tz): """ @@ -1102,4 +1084,4 @@ def to_julian_date(self): ) / 24.0) -DatetimeArrayMixin._add_comparison_methods() +DatetimeArrayMixin._add_comparison_ops() diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 66b1fb8db25c0..cb5afa34add2a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -40,10 +40,11 @@ def f(self): return property(f) -def _period_array_cmp(opname, cls): +def _period_array_cmp(cls, op): """ Wrap comparison operations to convert Period-like to PeriodDtype """ + opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False def wrapper(self, other): @@ -268,6 +269,8 @@ def asfreq(self, freq=None, how='E'): # ------------------------------------------------------------------ # Arithmetic Methods + _create_comparison_method = classmethod(_period_array_cmp) + def _sub_datelike(self, other): assert other is not NaT return NotImplemented @@ -381,18 +384,8 @@ def _maybe_convert_timedelta(self, other): raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - cls.__eq__ = _period_array_cmp('__eq__', cls) - cls.__ne__ = _period_array_cmp('__ne__', cls) - cls.__lt__ = _period_array_cmp('__lt__', cls) - cls.__gt__ = _period_array_cmp('__gt__', cls) - cls.__le__ = _period_array_cmp('__le__', cls) - cls.__ge__ = _period_array_cmp('__ge__', cls) - -PeriodArrayMixin._add_comparison_methods() +PeriodArrayMixin._add_comparison_ops() # ------------------------------------------------------------------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a28f7fc9c32fa..efa7c0b0e44d4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,7 +17,7 @@ import pandas.core.common as com -from pandas.tseries.offsets import Tick, DateOffset +from pandas.tseries.offsets import Tick from pandas.tseries.frequencies import to_offset from . import datetimelike as dtl @@ -54,10 +54,11 @@ def f(self): return property(f) -def _td_array_cmp(opname, cls): +def _td_array_cmp(cls, op): """ Wrap comparison operations to convert timedelta-like to timedelta64 """ + opname = '__{name}__'.format(name=op.__name__) nat_result = True if opname == '__ne__' else False def wrapper(self, other): @@ -126,25 +127,23 @@ def _simple_new(cls, values, freq=None, **kwargs): def __new__(cls, values, freq=None, start=None, end=None, periods=None, closed=None): - if (freq is not None and not isinstance(freq, DateOffset) and - freq != 'infer'): - freq = to_offset(freq) - periods = dtl.validate_periods(periods) + freq, freq_infer = dtl.maybe_infer_freq(freq) if values is None: + # TODO: Remove this block and associated kwargs; GH#20535 if freq is None and com._any_none(periods, start, end): raise ValueError('Must provide freq argument if no data is ' 'supplied') - else: - return cls._generate_range(start, end, periods, freq, - closed=closed) + periods = dtl.validate_periods(periods) + return cls._generate_range(start, end, periods, freq, + closed=closed) result = cls._simple_new(values, freq=freq) - if freq == 'infer': + if freq_infer: inferred = result.inferred_freq if inferred: - result._freq = to_offset(inferred) + result.freq = to_offset(inferred) return result @@ -161,23 +160,12 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): if end is not None: end = Timedelta(end) - left_closed = False - right_closed = False - if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") - if closed is None: - left_closed = True - right_closed = True - elif closed == "left": - left_closed = True - elif closed == "right": - right_closed = True - else: - raise ValueError("Closed has to be either 'left', 'right' or None") + left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: index = _generate_regular_range(start, end, periods, freq) @@ -197,6 +185,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): # ---------------------------------------------------------------- # Arithmetic Methods + _create_comparison_method = classmethod(_td_array_cmp) + def _add_offset(self, other): assert not isinstance(other, Tick) raise TypeError("cannot add the type {typ} to a {cls}" @@ -266,19 +256,6 @@ def _evaluate_with_timedelta_like(self, other, op): return NotImplemented - # ---------------------------------------------------------------- - # Comparison Methods - - @classmethod - def _add_comparison_methods(cls): - """add in comparison methods""" - cls.__eq__ = _td_array_cmp('__eq__', cls) - cls.__ne__ = _td_array_cmp('__ne__', cls) - cls.__lt__ = _td_array_cmp('__lt__', cls) - cls.__gt__ = _td_array_cmp('__gt__', cls) - cls.__le__ = _td_array_cmp('__le__', cls) - cls.__ge__ = _td_array_cmp('__ge__', cls) - # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timedelta methods @@ -392,7 +369,7 @@ def f(x): return result -TimedeltaArrayMixin._add_comparison_methods() +TimedeltaArrayMixin._add_comparison_ops() # --------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3ae5eb3a8dbf5..8f05a9a887830 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -789,9 +789,8 @@ def shift(self, n, freq=None): start = self[0] + n * self.freq end = self[-1] + n * self.freq attribs = self._get_attributes_dict() - attribs['start'] = start - attribs['end'] = end - return type(self)(**attribs) + return self._generate_range(start=start, end=end, periods=None, + **attribs) def repeat(self, repeats, *args, **kwargs): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7257be421c3e1..6ed752d3a213d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -27,7 +27,6 @@ pandas_dtype, ensure_int64) from pandas.core.dtypes.generic import ABCSeries -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat @@ -41,7 +40,7 @@ from pandas.core.indexes.datetimelike import ( DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) from pandas.tseries.offsets import ( - DateOffset, generate_range, Tick, CDay, prefix_mapping) + generate_range, Tick, CDay, prefix_mapping) from pandas.core.tools.timedeltas import to_timedelta from pandas.util._decorators import ( @@ -84,10 +83,12 @@ def func(self, *args, **kwargs): return func -def _dt_index_cmp(opname, cls): +def _dt_index_cmp(cls, op): """ Wrap comparison operations to convert datetime-like to datetime64 """ + opname = '__{name}__'.format(name=op.__name__) + def wrapper(self, other): result = getattr(DatetimeArrayMixin, opname)(self, other) if is_bool_dtype(result): @@ -238,12 +239,12 @@ def _join_i8_wrapper(joinf, **kwargs): @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ - cls.__eq__ = _dt_index_cmp('__eq__', cls) - cls.__ne__ = _dt_index_cmp('__ne__', cls) - cls.__lt__ = _dt_index_cmp('__lt__', cls) - cls.__gt__ = _dt_index_cmp('__gt__', cls) - cls.__le__ = _dt_index_cmp('__le__', cls) - cls.__ge__ = _dt_index_cmp('__ge__', cls) + cls.__eq__ = _dt_index_cmp(cls, operator.eq) + cls.__ne__ = _dt_index_cmp(cls, operator.ne) + cls.__lt__ = _dt_index_cmp(cls, operator.lt) + cls.__gt__ = _dt_index_cmp(cls, operator.gt) + cls.__le__ = _dt_index_cmp(cls, operator.le) + cls.__ge__ = _dt_index_cmp(cls, operator.ge) _engine_type = libindex.DatetimeEngine @@ -289,39 +290,20 @@ def __new__(cls, data=None, if name is None and hasattr(data, 'name'): name = data.name - freq_infer = False - if not isinstance(freq, DateOffset): - - # if a passed freq is None, don't infer automatically - if freq != 'infer': - freq = to_offset(freq) - else: - freq_infer = True - freq = None - - periods = dtl.validate_periods(periods) + freq, freq_infer = dtl.maybe_infer_freq(freq) # if dtype has an embedded tz, capture it - if dtype is not None: - try: - dtype = DatetimeTZDtype.construct_from_string(dtype) - dtz = getattr(dtype, 'tz', None) - if dtz is not None: - if tz is not None and str(tz) != str(dtz): - raise ValueError("cannot supply both a tz and a dtype" - " with a tz") - tz = dtz - except TypeError: - pass + tz = dtl.validate_tz_from_dtype(dtype, tz) if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 if freq is None and com._any_none(periods, start, end): - msg = 'Must provide freq argument if no data is supplied' - raise ValueError(msg) - else: - return cls._generate_range(start, end, periods, name, freq, - tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) + raise ValueError('Must provide freq argument if no data is ' + 'supplied') + periods = dtl.validate_periods(periods) + return cls._generate_range(start, end, periods, name, freq, + tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -407,23 +389,12 @@ def _generate_range(cls, start, end, periods, name, freq, tz=None, if end is not None: end = Timestamp(end) - left_closed = False - right_closed = False - if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") - if closed is None: - left_closed = True - right_closed = True - elif closed == "left": - left_closed = True - elif closed == "right": - right_closed = True - else: - raise ValueError("Closed has to be either 'left', 'right' or None") + left_closed, right_closed = dtl.validate_endpoints(closed) try: inferred_tz = timezones.infer_tzinfo(start, end) @@ -540,12 +511,6 @@ def _convert_for_op(self, value): return _to_m8(value) raise ValueError('Passed item and index have different timezone') - def _local_timestamps(self): - if self.is_monotonic: - return conversion.tz_convert(self.asi8, utc, self.tz) - else: - return DatetimeArrayMixin._local_timestamps(self) - @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None, **kwargs): @@ -1744,7 +1709,6 @@ def _generate_regular_range(cls, start, end, periods, freq): "if a 'period' is given.") data = np.arange(b, e, stride, dtype=np.int64) - # TODO: Do we need to use _simple_new here? just return data.view? data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: if isinstance(start, Timestamp): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4d8e57820f29d..350c609acbb4f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -804,7 +804,7 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ -PeriodIndex._add_comparison_methods() +PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() PeriodIndex._add_logical_methods_disabled() PeriodIndex._add_datetimelike_methods() diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index dc26c9cc0c248..af34ec8b22824 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,4 +1,5 @@ """ implement the TimedeltaIndex """ +import operator import numpy as np from pandas.core.dtypes.common import ( @@ -34,7 +35,6 @@ TimelikeOps, DatetimeIndexOpsMixin) from pandas.core.tools.timedeltas import ( to_timedelta, _coerce_scalar_to_timedelta_type) -from pandas.tseries.offsets import DateOffset from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT, iNaT) @@ -51,10 +51,12 @@ def f(self): return property(f) -def _td_index_cmp(opname, cls): +def _td_index_cmp(cls, op): """ Wrap comparison operations to convert timedelta-like to timedelta64 """ + opname = '__{name}__'.format(name=op.__name__) + def wrapper(self, other): result = getattr(TimedeltaArrayMixin, opname)(self, other) if is_bool_dtype(result): @@ -155,12 +157,12 @@ def _join_i8_wrapper(joinf, **kwargs): @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ - cls.__eq__ = _td_index_cmp('__eq__', cls) - cls.__ne__ = _td_index_cmp('__ne__', cls) - cls.__lt__ = _td_index_cmp('__lt__', cls) - cls.__gt__ = _td_index_cmp('__gt__', cls) - cls.__le__ = _td_index_cmp('__le__', cls) - cls.__ge__ = _td_index_cmp('__ge__', cls) + cls.__eq__ = _td_index_cmp(cls, operator.eq) + cls.__ne__ = _td_index_cmp(cls, operator.ne) + cls.__lt__ = _td_index_cmp(cls, operator.lt) + cls.__gt__ = _td_index_cmp(cls, operator.gt) + cls.__le__ = _td_index_cmp(cls, operator.le) + cls.__ge__ = _td_index_cmp(cls, operator.ge) _engine_type = libindex.TimedeltaEngine @@ -181,25 +183,16 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, else: return data._shallow_copy() - freq_infer = False - if not isinstance(freq, DateOffset): - - # if a passed freq is None, don't infer automatically - if freq != 'infer': - freq = to_offset(freq) - else: - freq_infer = True - freq = None - - periods = dtl.validate_periods(periods) + freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 if freq is None and com._any_none(periods, start, end): - msg = 'Must provide freq argument if no data is supplied' - raise ValueError(msg) - else: - return cls._generate_range(start, end, periods, name, freq, - closed=closed) + raise ValueError('Must provide freq argument if no data is ' + 'supplied') + periods = dtl.validate_periods(periods) + return cls._generate_range(start, end, periods, name, freq, + closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) @@ -226,7 +219,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, inferred = subarr.inferred_freq if inferred: subarr.freq = to_offset(inferred) - return subarr return subarr diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 387a70fe37253..59cd4743f857b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -250,7 +250,8 @@ def infer_freq(index, warn=True): if is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") - elif isinstance(index, pd.TimedeltaIndex): + elif is_timedelta64_dtype(index): + # Allow TimedeltaIndex and TimedeltaArray inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq()