Skip to content

Docstrings, de-duplicate EAMixin/DatetimeLikeIndex __new__ code #21926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ class ExtensionOpsMixin(object):
"""
A base class for linking the operators to their dunder names
"""

@classmethod
def _add_arithmetic_ops(cls):
cls.__add__ = cls._create_arithmetic_method(operator.add)
Expand Down
106 changes: 103 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np

from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs import timezones
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ_INDEX, IncompatibleFrequency)
Expand All @@ -13,7 +14,7 @@
from pandas import compat

from pandas.tseries import frequencies
from pandas.tseries.offsets import Tick
from pandas.tseries.offsets import Tick, DateOffset

from pandas.core.dtypes.common import (
needs_i8_conversion,
Expand All @@ -23,10 +24,13 @@
is_timedelta64_dtype,
is_object_dtype)
from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass
from pandas.core.dtypes.dtypes import DatetimeTZDtype

import pandas.core.common as com
from pandas.core.algorithms import checked_add_with_arr

from .base import ExtensionOpsMixin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you do absolute imports

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried it, causes import-time errors

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see, we import in init all of the extension arrays which import this. hmm.



def _make_comparison_op(op, cls):
# TODO: share code with indexes.base version? Main difference is that
Expand Down Expand Up @@ -87,7 +91,7 @@ def _shallow_copy(self, values=None, **kwargs):
return self._simple_new(values, **attributes)


class DatetimeLikeArrayMixin(AttributesMixin):
class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray

Expand Down Expand Up @@ -464,7 +468,10 @@ def _addsub_offset_array(self, other, op):
"{cls} not vectorized"
.format(cls=type(self).__name__), PerformanceWarning)

res_values = op(self.astype('O').values, np.array(other))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it worth a branch here in is_extension_dtype?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think values_from_object is a pretty clean solution to this.

# For EA self.astype('O') returns a numpy array, not an Index
left = lib.values_from_object(self.astype('O'))

res_values = op(left, np.array(other))
kwargs = {}
if not is_period_dtype(self):
kwargs['freq'] = 'infer'
Expand Down Expand Up @@ -551,3 +558,96 @@ def validate_periods(periods):
raise TypeError('periods must be a number, got {periods}'
.format(periods=periods))
return periods


def validate_endpoints(closed):
"""
Check that the `closed` argument is among [None, "left", "right"]

Parameters
----------
closed : {None, "left", "right"}

Returns
-------
left_closed : bool
right_closed : bool

Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False

if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")

return left_closed, right_closed


def maybe_infer_freq(freq):
"""
Comparing a DateOffset to the string "infer" raises, so we need to
be careful about comparisons. Make a dummy variable `freq_infer` to
signify the case where the given freq is "infer" and set freq to None
to avoid comparison trouble later on.

Parameters
----------
freq : {DateOffset, None, str}

Returns
-------
freq : {DateOffset, None}
freq_infer : bool
"""
freq_infer = False
if not isinstance(freq, DateOffset):
# if a passed freq is None, don't infer automatically
if freq != 'infer':
freq = frequencies.to_offset(freq)
else:
freq_infer = True
freq = None
return freq, freq_infer


def validate_tz_from_dtype(dtype, tz):
"""
If the given dtype is a DatetimeTZDtype, extract the implied
tzinfo object from it and check that it does not conflict with the given
tz.

Parameters
----------
dtype : dtype, str
tz : None, tzinfo

Returns
-------
tz : consensus tzinfo

Raises
------
ValueError : on tzinfo mismatch
"""
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you do this ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, does tz_compare not take None as an arg? hmm make should add the check there? as I don't think we check for None's elsewhere (not really sure about this)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None-tz comparisons are a pretty special case that get handled separately. I’m OK with tzcompare not taking None.

if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass
return tz
34 changes: 8 additions & 26 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import pandas.core.common as com
from pandas.core.algorithms import checked_add_with_arr

from pandas.tseries.frequencies import to_offset, DateOffset
from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import Tick

from pandas.core.arrays import datetimelike as dtl
Expand Down Expand Up @@ -84,10 +84,11 @@ def f(self):
return property(f)


def _dt_array_cmp(opname, cls):
def _dt_array_cmp(cls, op):
"""
Wrap comparison operations to convert datetime-like to datetime64
"""
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

def wrapper(self, other):
Expand Down Expand Up @@ -181,12 +182,10 @@ def __new__(cls, values, freq=None, tz=None):
# e.g. DatetimeIndex
tz = values.tz

if (freq is not None and not isinstance(freq, DateOffset) and
freq != 'infer'):
freq = to_offset(freq)
freq, freq_infer = dtl.maybe_infer_freq(freq)

result = cls._simple_new(values, freq=freq, tz=tz)
if freq == 'infer':
if freq_infer:
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)
Expand Down Expand Up @@ -289,17 +288,7 @@ def __iter__(self):
# -----------------------------------------------------------------
# Comparison Methods

@classmethod
def _add_comparison_methods(cls):
"""add in comparison methods"""
cls.__eq__ = _dt_array_cmp('__eq__', cls)
cls.__ne__ = _dt_array_cmp('__ne__', cls)
cls.__lt__ = _dt_array_cmp('__lt__', cls)
cls.__gt__ = _dt_array_cmp('__gt__', cls)
cls.__le__ = _dt_array_cmp('__le__', cls)
cls.__ge__ = _dt_array_cmp('__ge__', cls)
# TODO: Some classes pass __eq__ while others pass operator.eq;
# standardize this.
_create_comparison_method = classmethod(_dt_array_cmp)

def _has_same_tz(self, other):
zzone = self._timezone
Expand Down Expand Up @@ -441,14 +430,7 @@ def _local_timestamps(self):
This is used to calculate time-of-day information as if the timestamps
were timezone-naive.
"""
values = self.asi8
indexer = values.argsort()
result = conversion.tz_convert(values.take(indexer), utc, self.tz)

n = len(indexer)
reverse = np.empty(n, dtype=np.int_)
reverse.put(indexer, np.arange(n))
return result.take(reverse)
return conversion.tz_convert(self.asi8, utc, self.tz)

def tz_convert(self, tz):
"""
Expand Down Expand Up @@ -1102,4 +1084,4 @@ def to_julian_date(self):
) / 24.0)


DatetimeArrayMixin._add_comparison_methods()
DatetimeArrayMixin._add_comparison_ops()
17 changes: 5 additions & 12 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ def f(self):
return property(f)


def _period_array_cmp(opname, cls):
def _period_array_cmp(cls, op):
"""
Wrap comparison operations to convert Period-like to PeriodDtype
"""
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

def wrapper(self, other):
Expand Down Expand Up @@ -268,6 +269,8 @@ def asfreq(self, freq=None, how='E'):
# ------------------------------------------------------------------
# Arithmetic Methods

_create_comparison_method = classmethod(_period_array_cmp)

def _sub_datelike(self, other):
assert other is not NaT
return NotImplemented
Expand Down Expand Up @@ -381,18 +384,8 @@ def _maybe_convert_timedelta(self, other):
raise IncompatibleFrequency(msg.format(cls=type(self).__name__,
freqstr=self.freqstr))

@classmethod
def _add_comparison_methods(cls):
""" add in comparison methods """
cls.__eq__ = _period_array_cmp('__eq__', cls)
cls.__ne__ = _period_array_cmp('__ne__', cls)
cls.__lt__ = _period_array_cmp('__lt__', cls)
cls.__gt__ = _period_array_cmp('__gt__', cls)
cls.__le__ = _period_array_cmp('__le__', cls)
cls.__ge__ = _period_array_cmp('__ge__', cls)


PeriodArrayMixin._add_comparison_methods()
PeriodArrayMixin._add_comparison_ops()


# -------------------------------------------------------------------
Expand Down
51 changes: 14 additions & 37 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import pandas.core.common as com

from pandas.tseries.offsets import Tick, DateOffset
from pandas.tseries.offsets import Tick
from pandas.tseries.frequencies import to_offset

from . import datetimelike as dtl
Expand Down Expand Up @@ -54,10 +54,11 @@ def f(self):
return property(f)


def _td_array_cmp(opname, cls):
def _td_array_cmp(cls, op):
"""
Wrap comparison operations to convert timedelta-like to timedelta64
"""
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

def wrapper(self, other):
Expand Down Expand Up @@ -126,25 +127,23 @@ def _simple_new(cls, values, freq=None, **kwargs):

def __new__(cls, values, freq=None, start=None, end=None, periods=None,
closed=None):
if (freq is not None and not isinstance(freq, DateOffset) and
freq != 'infer'):
freq = to_offset(freq)

periods = dtl.validate_periods(periods)
freq, freq_infer = dtl.maybe_infer_freq(freq)

if values is None:
# TODO: Remove this block and associated kwargs; GH#20535
if freq is None and com._any_none(periods, start, end):
raise ValueError('Must provide freq argument if no data is '
'supplied')
else:
return cls._generate_range(start, end, periods, freq,
closed=closed)
periods = dtl.validate_periods(periods)
return cls._generate_range(start, end, periods, freq,
closed=closed)

result = cls._simple_new(values, freq=freq)
if freq == 'infer':
if freq_infer:
inferred = result.inferred_freq
if inferred:
result._freq = to_offset(inferred)
result.freq = to_offset(inferred)

return result

Expand All @@ -161,23 +160,12 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
if end is not None:
end = Timedelta(end)

left_closed = False
right_closed = False

if start is None and end is None:
if closed is not None:
raise ValueError("Closed has to be None if not both of start"
"and end are defined")

if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
left_closed, right_closed = dtl.validate_endpoints(closed)

if freq is not None:
index = _generate_regular_range(start, end, periods, freq)
Expand All @@ -197,6 +185,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
# ----------------------------------------------------------------
# Arithmetic Methods

_create_comparison_method = classmethod(_td_array_cmp)

def _add_offset(self, other):
assert not isinstance(other, Tick)
raise TypeError("cannot add the type {typ} to a {cls}"
Expand Down Expand Up @@ -266,19 +256,6 @@ def _evaluate_with_timedelta_like(self, other, op):

return NotImplemented

# ----------------------------------------------------------------
# Comparison Methods

@classmethod
def _add_comparison_methods(cls):
"""add in comparison methods"""
cls.__eq__ = _td_array_cmp('__eq__', cls)
cls.__ne__ = _td_array_cmp('__ne__', cls)
cls.__lt__ = _td_array_cmp('__lt__', cls)
cls.__gt__ = _td_array_cmp('__gt__', cls)
cls.__le__ = _td_array_cmp('__le__', cls)
cls.__ge__ = _td_array_cmp('__ge__', cls)

# ----------------------------------------------------------------
# Conversion Methods - Vectorized analogues of Timedelta methods

Expand Down Expand Up @@ -392,7 +369,7 @@ def f(x):
return result


TimedeltaArrayMixin._add_comparison_methods()
TimedeltaArrayMixin._add_comparison_ops()


# ---------------------------------------------------------------------
Expand Down
Loading