Skip to content

CLN: de-duplicate generate_range #23218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 23, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def _validate_frequency(cls, index, freq, **kwargs):
# Frequency validation is not meaningful for Period Array/Index
return None

# DatetimeArray may pass `ambiguous`, nothing else will be accepted
# by cls._generate_range below
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like this, can we just directly pass ambiguous as required?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but it is much more verbose (BTW the letter "v" on my keyboard is super-sticky, so we really need to stop talking about erbosity).

This would end up looking something like:

if is_timedelta64_dtype(index):
    assert ambiguous is None
    on_freq = cls._generate_range(start=index[0], end=None,
                                                        periods=len(index), freq=freq)
else:
    on_freq = cls._generate_range(start=index[0], end=None,
                                                        periods=len(index), freq=freq, ambiguous=ambiguous)

Or do you have something else in mind? I like this less than the way the PR currently does it, but by a small enough margin that I'm fine changing it to get this done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no what I would do is add this to DatetimeIndex. It doesn't totally de-duplicate but is more correct and less hacky here

@classmethod
    def _generate_range(cls, start, end, periods, name=None, freq=None,
                        tz=None, normalize=False, ambiguous='raise',
                        closed=None):
        return super(DatetimeIndex, cls)._generate_range(
            start, end, periods, freq,
            tz=tz, normalize=normalize, ambiguous=ambiguous, closed=closed)

allowed = {'ambiguous'} if index.dtype.kind == 'M' else {}
assert all(key in allowed for key in kwargs)

inferred = index.inferred_freq
if index.size == 0 or inferred == freq.freqstr:
return None
Expand Down Expand Up @@ -595,9 +600,12 @@ def _time_shift(self, periods, freq=None):

start = self[0] + periods * self.freq
end = self[-1] + periods * self.freq
attribs = self._get_attributes_dict()

# Note: in the DatetimeTZ case, _generate_range will infer the
# appropriate timezone from `start` and `end`, so tz does not need
# to be passed explicitly.
return self._generate_range(start=start, end=end, periods=None,
**attribs)
freq=self.freq)

@classmethod
def _add_datetimelike_methods(cls):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,13 @@ def _from_ordinals(cls, values, freq=None, **kwargs):

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
periods = dtl.validate_periods(periods)

if freq is not None:
freq = Period._maybe_convert_freq(freq)

field_count = len(fields)
if com.count_not_none(start, end) > 0:
if start is not None or end is not None:
if field_count > 0:
raise ValueError('Can either instantiate from fields '
'or endpoints, but not both')
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def _simple_new(cls, values, freq=None, **kwargs):
result._freq = freq
return result

def __new__(cls, values, freq=None, start=None, end=None, periods=None,
closed=None):
def __new__(cls, values, freq=None):

freq, freq_infer = dtl.maybe_infer_freq(freq)

Expand All @@ -140,8 +139,7 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None,
return result

@classmethod
def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
# **kwargs are for compat with TimedeltaIndex, which includes `name`
def _generate_range(cls, start, end, periods, freq, closed=None):

periods = dtl.validate_periods(periods)
if freq is None and any(x is None for x in [periods, start, end]):
Expand All @@ -167,10 +165,9 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):

if freq is not None:
index = _generate_regular_range(start, end, periods, freq)
index = cls._simple_new(index, freq=freq, **kwargs)
index = cls._simple_new(index, freq=freq)
else:
index = np.linspace(start.value, end.value, periods).astype('i8')
# TODO: shouldn't we pass `name` here? (via **kwargs)
index = cls._simple_new(index, freq=freq)

if not left_closed:
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,12 @@ def astype(self, dtype, copy=True):
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)

@Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
def _time_shift(self, periods, freq=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this related to the rest of the PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR changes DatetimeArrayMixin._time_shift such that it no longer pins self.name for Index subclasses.

result = DatetimeLikeArrayMixin._time_shift(self, periods, freq=freq)
result.name = self.name
return result


def _ensure_datetimelike_to_i8(other, to_utc=False):
"""
Expand Down
19 changes: 5 additions & 14 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ def __new__(cls, data=None,

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
return cls._generate_range(start, end, periods, name, freq,
tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

if not isinstance(data, (np.ndarray, Index, ABCSeries,
DatetimeArrayMixin)):
Expand Down Expand Up @@ -315,17 +317,6 @@ def __new__(cls, data=None,

return subarr._deepcopy_if_needed(ref_to_data, copy)

@classmethod
@Appender(DatetimeArrayMixin._generate_range.__doc__)
def _generate_range(cls, start, end, periods, name=None, freq=None,
tz=None, normalize=False, ambiguous='raise',
closed=None):
out = super(DatetimeIndex, cls)._generate_range(
start, end, periods, freq,
tz=tz, normalize=normalize, ambiguous=ambiguous, closed=closed)
out.name = name
return out

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
if self._has_same_tz(value):
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
raise TypeError('__new__() got an unexpected keyword argument {}'.
format(list(set(fields) - valid_field_set)[0]))

periods = dtl.validate_periods(periods)

if name is None and hasattr(data, 'name'):
name = data.name

Expand Down
20 changes: 4 additions & 16 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,

if data is None:
# TODO: Remove this block and associated kwargs; GH#20535
if freq is None and com._any_none(periods, start, end):
raise ValueError('Must provide freq argument if no data is '
'supplied')
periods = dtl.validate_periods(periods)
return cls._generate_range(start, end, periods, name, freq,
closed=closed)
result = cls._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)
Expand Down Expand Up @@ -181,16 +179,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,

return subarr

@classmethod
def _generate_range(cls, start, end, periods,
name=None, freq=None, closed=None):
# TimedeltaArray gets `name` via **kwargs, so we need to explicitly
# override it if name is passed as a positional argument
return super(TimedeltaIndex, cls)._generate_range(start, end,
periods, freq,
name=name,
closed=closed)

@classmethod
def _simple_new(cls, values, name=None, freq=None, **kwargs):
result = super(TimedeltaIndex, cls)._simple_new(values, freq, **kwargs)
Expand Down