Skip to content

Commit f4cefa4

Browse files
committed
CLN/COMPAT: IntervalIndex
1 parent d4a3c5d commit f4cefa4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3212
-3069
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,26 @@ def time_is_monotonic(self):
226226
self.miint.is_monotonic
227227

228228

229+
class IntervalIndexing(object):
230+
goal_time = 0.2
231+
232+
def setup(self):
233+
self.monotonic = Series(np.arange(1000000),
234+
index=IntervalIndex.from_breaks(np.arange(1000001)))
235+
236+
def time_getitem_scalar(self):
237+
self.monotonic[80000]
238+
239+
def time_loc_scalar(self):
240+
self.monotonic.loc[80000]
241+
242+
def time_getitem_list(self):
243+
self.monotonic[80000:]
244+
245+
def time_loc_list(self):
246+
self.monotonic.loc[80000:]
247+
248+
229249
class PanelIndexing(object):
230250
goal_time = 0.2
231251

doc/source/api.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,27 @@ Categorical Components
14041404
CategoricalIndex.as_ordered
14051405
CategoricalIndex.as_unordered
14061406

1407+
.. _api.intervalindex:
1408+
1409+
IntervalIndex
1410+
-------------
1411+
1412+
.. autosummary::
1413+
:toctree: generated/
1414+
1415+
IntervalIndex
1416+
1417+
IntervalIndex Components
1418+
~~~~~~~~~~~~~~~~~~~~~~~~
1419+
1420+
.. autosummary::
1421+
:toctree: generated/
1422+
1423+
IntervalIndex.from_arrays
1424+
IntervalIndex.from_tuples
1425+
IntervalIndex.from_breaks
1426+
IntervalIndex.from_intervals
1427+
14071428
.. _api.multiindex:
14081429

14091430
MultiIndex

doc/source/whatsnew/v0.20.0.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Highlights include:
1212
- The ``.ix`` indexer has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_ix>`
1313
- Improved user API when accessing levels in ``.groupby()``, see :ref:`here <whatsnew_0200.enhancements.groupby_access>`
1414
- Improved support for UInt64 dtypes, see :ref:`here <whatsnew_0200.enhancements.uint64_support>`
15+
- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here <whatsnew_0200.enhancements.intervalindex>`
1516
- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here <whatsnew_0200.enhancements.table_schema>`
1617
- Support for S3 handling now uses ``s3fs``, see :ref:`here <whatsnew_0200.api_breaking.s3>`
1718
- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here <whatsnew_0200.api_breaking.gbq>`
@@ -311,6 +312,36 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
311312

312313
sdf.to_coo()
313314

315+
.. _whatsnew_0200.enhancements.intervalindex:
316+
317+
IntervalIndex
318+
^^^^^^^^^^^^^
319+
320+
pandas has gain an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval
321+
notation, specifically as return type for ``pd.cut`` and ``pd.qcut``. (:issue:`7640`, :issue:`8625`)
322+
323+
**Previous behavior**:
324+
325+
.. code-block:: ipython
326+
327+
In [2]: pd.cut(range(3), 2)
328+
Out[2]:
329+
[(-0.002, 1], (-0.002, 1], (1, 2]]
330+
Categories (2, object): [(-0.002, 1] < (1, 2]]
331+
332+
# the returned categories are strings, representing Intervals
333+
In [3]: pd.cut(range(3), 2).categories
334+
Out[3]: Index(['(-0.002, 1]', '(1, 2]'], dtype='object')
335+
336+
**New behavior**:
337+
338+
.. ipython:: python
339+
340+
c = pd.cut(range(3), 2)
341+
c
342+
c.categories
343+
pd.api.types.is_interval_dtype(c.categories)
344+
314345
.. _whatsnew_0200.enhancements.other:
315346

316347
Other Enhancements

pandas/_libs/hashtable.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ PyDateTime_IMPORT
3939
cdef extern from "Python.h":
4040
int PySlice_Check(object)
4141

42+
cdef size_t _INIT_VEC_CAP = 128
43+
4244
include "hashtable_class_helper.pxi"
4345
include "hashtable_func_helper.pxi"
4446

pandas/src/interval.pyx renamed to pandas/_libs/interval.pyx

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ cimport numpy as np
22
import numpy as np
33
import pandas as pd
44

5+
cimport util
56
cimport cython
67
import cython
8+
from numpy cimport *
9+
from tslib import Timestamp
710

811
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
912
PyObject_RichCompare)
@@ -44,6 +47,20 @@ cdef _interval_like(other):
4447

4548

4649
cdef class Interval(IntervalMixin):
50+
"""
51+
Immutable object implementing an Interval, a bounded slice-like interval.
52+
53+
.. versionadded:: 0.20.0
54+
55+
Properties
56+
----------
57+
left, right : values
58+
Left and right bounds for each interval.
59+
closed : {'left', 'right', 'both', 'neither'}
60+
Whether the interval is closed on the left-side, right-side, both or
61+
neither. Defaults to 'right'.
62+
"""
63+
4764
cdef readonly object left, right
4865
cdef readonly str closed
4966

@@ -84,88 +101,115 @@ cdef class Interval(IntervalMixin):
84101
return NotImplemented
85102
else:
86103
op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op]
87-
raise TypeError('unorderable types: %s() %s %s()' %
88-
(type(self).__name__, op_str, type(other).__name__))
104+
raise TypeError(
105+
'unorderable types: %s() %s %s()' %
106+
(type(self).__name__, op_str, type(other).__name__))
89107

90108
def __reduce__(self):
91109
args = (self.left, self.right, self.closed)
92110
return (type(self), args)
93111

112+
def _repr_base(self):
113+
left = self.left
114+
right = self.right
115+
116+
# TODO: need more general formatting methodology here
117+
if isinstance(left, Timestamp) and isinstance(right, Timestamp):
118+
left = left._short_repr
119+
right = right._short_repr
120+
121+
return left, right
122+
94123
def __repr__(self):
124+
125+
left, right = self._repr_base()
95126
return ('%s(%r, %r, closed=%r)' %
96-
(type(self).__name__, self.left, self.right, self.closed))
127+
(type(self).__name__, left, right, self.closed))
97128

98129
def __str__(self):
130+
131+
left, right = self._repr_base()
99132
start_symbol = '[' if self.closed_left else '('
100133
end_symbol = ']' if self.closed_right else ')'
101-
return '%s%s, %s%s' % (start_symbol, self.left, self.right, end_symbol)
134+
return '%s%s, %s%s' % (start_symbol, left, right, end_symbol)
102135

103136
def __add__(self, y):
104137
if isinstance(y, numbers.Number):
105138
return Interval(self.left + y, self.right + y)
106139
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
107140
return Interval(y.left + self, y.right + self)
108-
else:
109-
raise NotImplemented
141+
return NotImplemented
110142

111143
def __sub__(self, y):
112144
if isinstance(y, numbers.Number):
113145
return Interval(self.left - y, self.right - y)
114-
else:
115-
raise NotImplemented
146+
return NotImplemented
116147

117148
def __mul__(self, y):
118149
if isinstance(y, numbers.Number):
119150
return Interval(self.left * y, self.right * y)
120151
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
121152
return Interval(y.left * self, y.right * self)
122-
else:
123-
return NotImplemented
153+
return NotImplemented
124154

125155
def __div__(self, y):
126156
if isinstance(y, numbers.Number):
127157
return Interval(self.left / y, self.right / y)
128-
else:
129-
return NotImplemented
158+
return NotImplemented
130159

131160
def __truediv__(self, y):
132161
if isinstance(y, numbers.Number):
133162
return Interval(self.left / y, self.right / y)
134-
else:
135-
return NotImplemented
163+
return NotImplemented
136164

137165
def __floordiv__(self, y):
138166
if isinstance(y, numbers.Number):
139167
return Interval(self.left // y, self.right // y)
140-
else:
141-
return NotImplemented
168+
return NotImplemented
142169

143170

144171
@cython.wraparound(False)
145172
@cython.boundscheck(False)
146-
cpdef interval_bounds_to_intervals(np.ndarray left, np.ndarray right,
147-
str closed):
148-
result = np.empty(len(left), dtype=object)
149-
nulls = pd.isnull(left) | pd.isnull(right)
150-
result[nulls] = np.nan
151-
for i in np.flatnonzero(~nulls):
152-
result[i] = Interval(left[i], right[i], closed)
153-
return result
173+
cpdef intervals_to_interval_bounds(ndarray intervals):
174+
"""
175+
Parameters
176+
----------
177+
intervals: ndarray object array of Intervals / nulls
154178
179+
Returns
180+
-------
181+
tuples (left: ndarray object array,
182+
right: ndarray object array,
183+
closed: str)
184+
185+
"""
186+
187+
cdef:
188+
object closed = None, interval
189+
int64_t n = len(intervals)
190+
ndarray left, right
191+
192+
left = np.empty(n, dtype=object)
193+
right = np.empty(n, dtype=object)
155194

156-
@cython.wraparound(False)
157-
@cython.boundscheck(False)
158-
cpdef intervals_to_interval_bounds(np.ndarray intervals):
159-
left = np.empty(len(intervals), dtype=object)
160-
right = np.empty(len(intervals), dtype=object)
161-
cdef str closed = None
162195
for i in range(len(intervals)):
163196
interval = intervals[i]
197+
if util._checknull(interval):
198+
left[i] = np.nan
199+
right[i] = np.nan
200+
continue
201+
202+
if not isinstance(interval, Interval):
203+
raise TypeError("type {} with value {} is not an interval".format(
204+
type(interval), interval))
205+
164206
left[i] = interval.left
165207
right[i] = interval.right
166208
if closed is None:
167209
closed = interval.closed
168210
elif closed != interval.closed:
169211
raise ValueError('intervals must all be closed on the same side')
212+
170213
return left, right, closed
171214

215+
include "intervaltree.pxi"

0 commit comments

Comments
 (0)