diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 39b26c61172ed..822df1ce2b968 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -14,30 +14,46 @@ import numbers _VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) -cdef class IntervalMixin: - property closed_left: - def __get__(self): - return self.closed == 'left' or self.closed == 'both' - - property closed_right: - def __get__(self): - return self.closed == 'right' or self.closed == 'both' - - property open_left: - def __get__(self): - return not self.closed_left - - property open_right: - def __get__(self): - return not self.closed_right - - property mid: - def __get__(self): - try: - return 0.5 * (self.left + self.right) - except TypeError: - # datetime safe version - return self.left + 0.5 * (self.right - self.left) +cdef class IntervalMixin(object): + + @property + def closed_left(self): + """ + Return True if the Interval is closed on the left-side, else False + """ + return self.closed in ('left', 'both') + + @property + def closed_right(self): + """ + Return True if the Interval is closed on the right-side, else False + """ + return self.closed in ('right', 'both') + + @property + def open_left(self): + """ + Return True if the Interval is open on the left-side, else False + """ + return not self.closed_left + + @property + def open_right(self): + """ + Return True if the Interval is open on the right-side, else False + """ + return not self.closed_right + + @property + def mid(self): + """ + Return the midpoint of the Interval + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * (self.right - self.left) cdef _interval_like(other): @@ -55,12 +71,12 @@ cdef class Interval(IntervalMixin): Parameters ---------- left : value - Left bound for interval. + Left bound for the interval right : value - Right bound for interval. - closed : {'left', 'right', 'both', 'neither'} + Right bound for the interval + closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the interval is closed on the left-side, right-side, both or - neither. Defaults to 'right'. + neither Examples -------- @@ -77,20 +93,30 @@ cdef class Interval(IntervalMixin): See Also -------- - IntervalIndex : an Index of ``interval`` s that are all closed on the same - side. - cut, qcut : convert arrays of continuous data into categoricals/series of - ``Interval``. + IntervalIndex : An Index of Interval objects that are all closed on the + same side. + cut, qcut : Convert arrays of continuous data into Categoricals/Series of + Interval. """ - cdef readonly object left, right + cdef readonly object left + """Left bound for the interval""" + + cdef readonly object right + """Right bound for the interval""" + cdef readonly str closed + """ + Whether the interval is closed on the left-side, right-side, both or + neither + """ def __init__(self, left, right, str closed='right'): # note: it is faster to just do these checks than to use a special # constructor (__cinit__/__new__) to avoid them if closed not in _VALID_CLOSED: - raise ValueError("invalid option for 'closed': %s" % closed) + msg = "invalid option for 'closed': {closed}".format(closed=closed) + raise ValueError(msg) if not left <= right: raise ValueError('left side of interval must be <= right side') self.left = left @@ -122,10 +148,11 @@ cdef class Interval(IntervalMixin): if op == Py_EQ or op == Py_NE: return NotImplemented else: + name = type(self).__name__ + other = type(other).__name__ op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op] - raise TypeError( - 'unorderable types: %s() %s %s()' % - (type(self).__name__, op_str, type(other).__name__)) + raise TypeError('unorderable types: {name}() {op} {other}()' + .format(name=name, op=op_str, other=other)) def __reduce__(self): args = (self.left, self.right, self.closed) @@ -145,15 +172,18 @@ cdef class Interval(IntervalMixin): def __repr__(self): left, right = self._repr_base() - return ('%s(%r, %r, closed=%r)' % - (type(self).__name__, left, right, self.closed)) + name = type(self).__name__ + repr_str = '{name}({left!r}, {right!r}, closed={closed!r})'.format( + name=name, left=left, right=right, closed=self.closed) + return repr_str def __str__(self): left, right = self._repr_base() start_symbol = '[' if self.closed_left else '(' end_symbol = ']' if self.closed_right else ')' - return '%s%s, %s%s' % (start_symbol, left, right, end_symbol) + return '{start}{left}, {right}{end}'.format( + start=start_symbol, left=left, right=right, end=end_symbol) def __add__(self, y): if isinstance(y, numbers.Number): @@ -222,8 +252,8 @@ cpdef intervals_to_interval_bounds(ndarray intervals): continue if not isinstance(interval, Interval): - raise TypeError("type {} with value {} is not an interval".format( - type(interval), interval)) + raise TypeError("type {typ} with value {iv} is not an interval" + .format(typ=type(interval), iv=interval)) left[i] = interval.left right[i] = interval.right diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3f74694880533..02ac74e619fa4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -61,8 +61,8 @@ def _get_next_label(label): elif is_float_dtype(dtype): return np.nextafter(label, np.infty) else: - raise TypeError('cannot determine next label for type %r' - % type(label)) + raise TypeError('cannot determine next label for type {typ!r}' + .format(typ=type(label))) def _get_prev_label(label): @@ -76,8 +76,8 @@ def _get_prev_label(label): elif is_float_dtype(dtype): return np.nextafter(label, -np.infty) else: - raise TypeError('cannot determine next label for type %r' - % type(label)) + raise TypeError('cannot determine next label for type {typ!r}' + .format(typ=type(label))) def _get_interval_closed_bounds(interval): @@ -94,17 +94,18 @@ def _get_interval_closed_bounds(interval): def _new_IntervalIndex(cls, d): - """ This is called upon unpickling, - rather than the default which doesn't - have arguments and breaks __new__ """ - + """ + This is called upon unpickling, rather than the default which doesn't have + arguments and breaks __new__ + """ return cls.from_arrays(**d) class IntervalIndex(IntervalMixin, Index): """ Immutable Index implementing an ordered, sliceable set. IntervalIndex - represents an Index of intervals that are all closed on the same side. + represents an Index of Interval objects that are all closed on the same + side. .. versionadded:: 0.20.0 @@ -117,9 +118,9 @@ class IntervalIndex(IntervalMixin, Index): ---------- left, right : array-like (1-dimensional) Left and right bounds for each interval. - closed : {'left', 'right', 'both', 'neither'}, optional + closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or - neither. Defaults to 'right'. + neither. name : object, optional Name to be stored in the index. copy : boolean, default False @@ -146,7 +147,7 @@ class IntervalIndex(IntervalMixin, Index): closed='right', dtype='interval[int64]') It may also be constructed using one of the constructor - methods :meth:`IntervalIndex.from_arrays`, + methods: :meth:`IntervalIndex.from_arrays`, :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals` and :meth:`IntervalIndex.from_tuples`. @@ -162,12 +163,10 @@ class IntervalIndex(IntervalMixin, Index): See Also -------- Index : The base pandas Index type - Interval : A bounded slice-like interval - interval_range : Function to create a fixed frequency - IntervalIndex, IntervalIndex.from_arrays, IntervalIndex.from_breaks, - IntervalIndex.from_intervals, IntervalIndex.from_tuples - cut, qcut : convert arrays of continuous data into categoricals/series of - ``Interval``. + Interval : A bounded slice-like interval; the elements of an IntervalIndex + interval_range : Function to create a fixed frequency IntervalIndex + cut, qcut : Convert arrays of continuous data into Categoricals/Series of + Intervals """ _typ = 'intervalindex' _comparables = ['name'] @@ -232,9 +231,9 @@ def _simple_new(cls, left, right, closed=None, name=None, left = left.astype(right.dtype) if type(left) != type(right): - raise ValueError("must not have differing left [{}] " - "and right [{}] types".format( - type(left), type(right))) + raise ValueError("must not have differing left [{left}] " + "and right [{right}] types" + .format(left=type(left), right=type(right))) if isinstance(left, ABCPeriodIndex): raise ValueError("Period dtypes are not supported, " @@ -279,7 +278,8 @@ def _validate(self): Verify that the IntervalIndex is valid. """ if self.closed not in _VALID_CLOSED: - raise ValueError("invalid options for 'closed': %s" % self.closed) + raise ValueError("invalid options for 'closed': {closed}" + .format(closed=self.closed)) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') left_mask = notna(self.left) @@ -293,12 +293,15 @@ def _validate(self): @cache_readonly def hasnans(self): - """ return if I have any nans; enables various perf speedups """ + """ + Return if the IntervalIndex has any nans; enables various performance + speedups + """ return self._isnan.any() @cache_readonly def _isnan(self): - """ return if each value is nan""" + """Return a mask indicating if each value is NA""" if self._mask is None: self._mask = isna(self.left) return self._mask @@ -335,7 +338,7 @@ def __contains__(self, key): def contains(self, key): """ - return a boolean if this key is IN the index + Return a boolean indicating if the key is IN the index We accept / allow keys to be not *just* actual objects. @@ -363,9 +366,9 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): ---------- breaks : array-like (1-dimensional) Left and right bounds for each interval. - closed : {'left', 'right', 'both', 'neither'}, optional + closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. Defaults to 'right'. + or neither. name : object, optional Name to be stored in the index. copy : boolean, default False @@ -404,9 +407,9 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): Left bounds for each interval. right : array-like (1-dimensional) Right bounds for each interval. - closed : {'left', 'right', 'both', 'neither'}, optional + closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. Defaults to 'right'. + or neither. name : object, optional Name to be stored in the index. copy : boolean, default False @@ -491,9 +494,9 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): ---------- data : array-like (1-dimensional) Array of tuples - closed : {'left', 'right', 'both', 'neither'}, optional + closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. Defaults to 'right'. + or neither. name : object, optional Name to be stored in the index. copy : boolean, default False @@ -521,15 +524,12 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): left = right = data for d in data: - if isna(d): - left.append(np.nan) - right.append(np.nan) - continue - - l, r = d - left.append(l) - right.append(r) + lhs = rhs = np.nan + else: + lhs, rhs = d + left.append(lhs) + right.append(rhs) # TODO # if we have nulls and we previous had *only* @@ -538,6 +538,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): return cls.from_arrays(left, right, closed, name=name, copy=False) def to_tuples(self): + """Return an Index of tuples of the form (left, right)""" return Index(_asarray_tuplesafe(zip(self.left, self.right))) @cache_readonly @@ -547,14 +548,26 @@ def _multiindex(self): @property def left(self): + """ + Return the left endpoints of each Interval in the IntervalIndex as + an Index + """ return self._left @property def right(self): + """ + Return the right endpoints of each Interval in the IntervalIndex as + an Index + """ return self._right @property def closed(self): + """ + Whether the intervals are closed on the left-side, right-side, both or + neither + """ return self._closed def __len__(self): @@ -563,7 +576,7 @@ def __len__(self): @cache_readonly def values(self): """ - Returns the IntervalIndex's data as a numpy array of Interval + Return the IntervalIndex's data as a numpy array of Interval objects (with dtype='object') """ left = self.left @@ -615,14 +628,17 @@ def astype(self, dtype, copy=True): elif is_categorical_dtype(dtype): from pandas import Categorical return Categorical(self, ordered=True) - raise ValueError('Cannot cast IntervalIndex to dtype %s' % dtype) + raise ValueError('Cannot cast IntervalIndex to dtype {dtype}' + .format(dtype=dtype)) @cache_readonly def dtype(self): + """Return the dtype object of the underlying data""" return IntervalDtype.construct_from_string(str(self.left.dtype)) @property def inferred_type(self): + """Return a string of the type inferred from the values""" return 'interval' @Appender(Index.memory_usage.__doc__) @@ -634,7 +650,8 @@ def memory_usage(self, deep=False): @cache_readonly def mid(self): - """Returns the mid-point of each interval in the index as an array + """ + Return the midpoint of each Interval in the IntervalIndex as an Index """ try: return Index(0.5 * (self.left.values + self.right.values)) @@ -645,22 +662,42 @@ def mid(self): @cache_readonly def is_monotonic(self): + """ + Return True if the IntervalIndex is monotonic increasing (only equal or + increasing values), else False + """ return self._multiindex.is_monotonic @cache_readonly def is_monotonic_increasing(self): + """ + Return True if the IntervalIndex is monotonic increasing (only equal or + increasing values), else False + """ return self._multiindex.is_monotonic_increasing @cache_readonly def is_monotonic_decreasing(self): + """ + Return True if the IntervalIndex is monotonic decreasing (only equal or + decreasing values), else False + """ return self._multiindex.is_monotonic_decreasing @cache_readonly def is_unique(self): + """ + Return True if the IntervalIndex contains unique elements, else False + """ return self._multiindex.is_unique @cache_readonly def is_non_overlapping_monotonic(self): + """ + Return True if the IntervalIndex is non-overlapping (no Intervals share + points) and is either monotonic increasing or monotonic decreasing, + else False + """ # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) # we already require left <= right @@ -725,9 +762,8 @@ def _check_method(self, method): return if method in ['bfill', 'backfill', 'pad', 'ffill', 'nearest']: - raise NotImplementedError( - 'method {} not yet implemented for ' - 'IntervalIndex'.format(method)) + msg = 'method {method} not yet implemented for IntervalIndex' + raise NotImplementedError(msg.format(method=method)) raise ValueError("Invalid fill method") @@ -866,17 +902,14 @@ def get_value(self, series, key): elif isinstance(key, slice): if not (key.step is None or key.step == 1): - raise ValueError("cannot support not-default " - "step in a slice") + raise ValueError("cannot support not-default step in a slice") try: loc = self.get_loc(key) except TypeError: - - # we didn't find exact intervals - # or are non-unique - raise ValueError("unable to slice with " - "this key: {}".format(key)) + # we didn't find exact intervals or are non-unique + msg = "unable to slice with this key: {key}".format(key=key) + raise ValueError(msg) else: loc = self.get_loc(key) @@ -929,31 +962,31 @@ def _get_reindexer(self, target): indexer = [] n = len(self) - for i, (l, r) in enumerate(zip(lindexer, rindexer)): + for i, (lhs, rhs) in enumerate(zip(lindexer, rindexer)): target_value = target[i] # matching on the lhs bound - if (l != -1 and + if (lhs != -1 and self.closed == 'right' and - target_value.left == self[l].right): - l += 1 + target_value.left == self[lhs].right): + lhs += 1 # matching on the lhs bound - if (r != -1 and + if (rhs != -1 and self.closed == 'left' and - target_value.right == self[r].left): - r -= 1 + target_value.right == self[rhs].left): + rhs -= 1 # not found - if l == -1 and r == -1: + if lhs == -1 and rhs == -1: indexer.append(np.array([-1])) - elif r == -1: + elif rhs == -1: - indexer.append(np.arange(l, n)) + indexer.append(np.arange(lhs, n)) - elif l == -1: + elif lhs == -1: # care about left/right closed here value = self[i] @@ -976,10 +1009,10 @@ def _get_reindexer(self, target): indexer.append(np.array([-1])) continue - indexer.append(np.arange(0, r + 1)) + indexer.append(np.arange(0, rhs + 1)) else: - indexer.append(np.arange(l, r + 1)) + indexer.append(np.arange(lhs, rhs + 1)) return np.concatenate(indexer) @@ -996,11 +1029,32 @@ def where(self, cond, other=None): return self._shallow_copy(values) def delete(self, loc): + """ + Return a new IntervalIndex with passed location(-s) deleted + + Returns + ------- + new_index : IntervalIndex + """ new_left = self.left.delete(loc) new_right = self.right.delete(loc) return self._shallow_copy(new_left, new_right) def insert(self, loc, item): + """ + Return a new IntervalIndex inserting new item at location. Follows + Python list.append semantics for negative values. Only Interval + objects and NA can be inserted into an IntervalIndex + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : IntervalIndex + """ if isinstance(item, Interval): if item.closed != self.closed: raise ValueError('inserted item must be closed on the same ' @@ -1108,23 +1162,23 @@ def _format_data(self, name=None): summary = '[]' elif n == 1: first = formatter(self[0]) - summary = '[{}]'.format(first) + summary = '[{first}]'.format(first=first) elif n == 2: first = formatter(self[0]) last = formatter(self[-1]) - summary = '[{}, {}]'.format(first, last) + summary = '[{first}, {last}]'.format(first=first, last=last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] - summary = '[{} ... {}]'.format(', '.join(head), - ', '.join(tail)) + summary = '[{head} ... {tail}]'.format( + head=', '.join(head), tail=', '.join(tail)) else: head = [] tail = [formatter(x) for x in self] - summary = '[{}]'.format(', '.join(tail)) + summary = '[{tail}]'.format(tail=', '.join(tail)) return summary + self._format_space() @@ -1132,17 +1186,20 @@ def _format_attrs(self): attrs = [('closed', repr(self.closed))] if self.name is not None: attrs.append(('name', default_pprint(self.name))) - attrs.append(('dtype', "'%s'" % self.dtype)) + attrs.append(('dtype', "'{dtype}'".format(dtype=self.dtype))) return attrs def _format_space(self): - return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + space = ' ' * (len(self.__class__.__name__) + 1) + return "\n{space}".format(space=space) def argsort(self, *args, **kwargs): return np.lexsort((self.right, self.left)) def equals(self, other): - + """ + Determines if two IntervalIndex objects contain the same elements + """ if self.is_(other): return True @@ -1216,8 +1273,9 @@ def interval_range(start=None, end=None, periods=None, freq=None, for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex - closed : string, default 'right' - options are: 'left', 'right', 'both', 'neither' + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. Notes -----