Skip to content

Commit 372e77f

Browse files
y-pjreback
y-p
authored andcommitted
ENH: support per-axis, per-level indexing with loc[]
CLN: add comments in indexing code CLN: comment out possibly stale kludge fix and wait for explosion CLN: Mark if clause for handling of per-axis tuple indexing with loc PERF: vectorize _spec_to_array_indices, for 3-4x speedup PERF: remove no longer needed list conversion. 1.4x speedup
1 parent 3e54611 commit 372e77f

File tree

1 file changed

+200
-10
lines changed

1 file changed

+200
-10
lines changed

pandas/core/indexing.py

Lines changed: 200 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ def _get_label(self, label, axis=0):
6161
return self.obj[label]
6262
elif (isinstance(label, tuple) and
6363
isinstance(label[axis], slice)):
64-
65-
raise IndexingError('no slices here')
64+
raise IndexingError('no slices here, handle elsewhere')
6665

6766
try:
6867
return self.obj._xs(label, axis=axis, copy=False)
@@ -700,24 +699,32 @@ def _getitem_lowerdim(self, tup):
700699
# a bit kludgy
701700
if isinstance(ax0, MultiIndex):
702701
try:
702+
# fast path for series or for tup devoid of slices
703703
return self._get_label(tup, axis=0)
704704
except TypeError:
705705
# slices are unhashable
706706
pass
707707
except Exception as e1:
708708
if isinstance(tup[0], (slice, Index)):
709-
raise IndexingError
709+
raise IndexingError("Handle elsewhere")
710710

711711
# raise the error if we are not sorted
712712
if not ax0.is_lexsorted_for_tuple(tup):
713713
raise e1
714-
try:
715-
loc = ax0.get_loc(tup[0])
716-
except KeyError:
717-
raise e1
714+
715+
# GH911 introduced this clause, but the regression test
716+
# added for it now passes even without it. Let's rock the boat.
717+
# 2014/01/27
718+
719+
# # should we abort, or keep going?
720+
# try:
721+
# loc = ax0.get_loc(tup[0])
722+
# except KeyError:
723+
# raise e1
724+
718725

719726
if len(tup) > self.obj.ndim:
720-
raise IndexingError
727+
raise IndexingError("Too many indexers. handle elsewhere")
721728

722729
# to avoid wasted computation
723730
# df.ix[d1:d2, 0] -> columns first (True)
@@ -730,9 +737,9 @@ def _getitem_lowerdim(self, tup):
730737
if not _is_list_like(section):
731738
return section
732739

733-
# might have been a MultiIndex
734740
elif section.ndim == self.ndim:
735-
741+
# we're in the middle of slicing through a MultiIndex
742+
# revise the key wrt to `section` by inserting an _NS
736743
new_key = tup[:i] + (_NS,) + tup[i + 1:]
737744

738745
else:
@@ -748,6 +755,7 @@ def _getitem_lowerdim(self, tup):
748755
if len(new_key) == 1:
749756
new_key, = new_key
750757

758+
# This is an elided recursive call to iloc/loc/etc'
751759
return getattr(section, self.name)[new_key]
752760

753761
raise IndexingError('not applicable')
@@ -1171,6 +1179,14 @@ def _getitem_axis(self, key, axis=0):
11711179
raise ValueError('Cannot index with multidimensional key')
11721180

11731181
return self._getitem_iterable(key, axis=axis)
1182+
elif isinstance(key, tuple) and isinstance(labels, MultiIndex) and \
1183+
any([isinstance(x,slice) for x in key]):
1184+
# handle per-axis tuple containting label criteria for
1185+
# each level (or a prefix of levels), may contain
1186+
# (None) slices, list of labels or labels
1187+
specs = _tuple_to_mi_locs(labels,key)
1188+
g = _spec_to_array_indices(labels, specs)
1189+
return self.obj.iloc[g]
11741190
else:
11751191
self._has_valid_type(key, axis)
11761192
return self._get_label(key, axis=axis)
@@ -1554,3 +1570,177 @@ def _maybe_droplevels(index, key):
15541570
pass
15551571

15561572
return index
1573+
1574+
def _tuple_to_mi_locs(ix,tup):
1575+
"""Convert a tuple of slices/label lists/labels to a level-wise spec
1576+
1577+
Parameters
1578+
----------
1579+
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
1580+
tup: a tuple of slices, labels or lists of labels.
1581+
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
1582+
will have labels from trailing levels included.
1583+
1584+
Returns
1585+
-------
1586+
a list containing ix.nlevels elements of either:
1587+
- 2-tuple representing a (start,stop) slice
1588+
or
1589+
- a list of label positions.
1590+
1591+
The positions are relative to the labels of the corresponding level, not to
1592+
the entire unrolled index.
1593+
1594+
Example (This is *not* a doctest):
1595+
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
1596+
>>> for x in mi.get_values(): print(x)
1597+
('A0', 'B0')
1598+
('A0', 'B1')
1599+
('A1', 'B0')
1600+
('A1', 'B1')
1601+
('A2', 'B0')
1602+
('A2', 'B1')
1603+
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
1604+
[(0, 2), [0, 1]]
1605+
1606+
read as:
1607+
- All labels in position [0,1) in first level
1608+
- for each of those, all labels at positions 0 or 1.
1609+
1610+
The same effective result can be achieved by specifying the None Slice,
1611+
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
1612+
but the outcome is the same.
1613+
1614+
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),slice(None)))
1615+
[(0, 2), (0,2)]
1616+
1617+
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),))
1618+
[(0, 2), (0,2)]
1619+
1620+
"""
1621+
1622+
1623+
ranges = []
1624+
1625+
# ix must be lexsorted to at least as many levels
1626+
# as there are elements in `tup`
1627+
assert ix.is_lexsorted_for_tuple(tup)
1628+
assert ix.is_unique
1629+
assert isinstance(ix,MultiIndex)
1630+
1631+
for i,k in enumerate(tup):
1632+
level = ix.levels[i]
1633+
1634+
if _is_list_like(k):
1635+
# a collection of labels to include from this level
1636+
ranges.append([level.get_loc(x) for x in k])
1637+
continue
1638+
if k == slice(None):
1639+
start = 0
1640+
stop = len(level)
1641+
elif isinstance(k,slice):
1642+
start = level.get_loc(k.start)
1643+
stop = len(level)
1644+
if k.stop:
1645+
stop = level.get_loc(k.stop)
1646+
else:
1647+
# a single label
1648+
start = level.get_loc(k)
1649+
stop = start
1650+
1651+
ranges.append((start,stop))
1652+
1653+
for i in range(i+1,len(ix.levels)):
1654+
# omitting trailing dims
1655+
# means include all values
1656+
level = ix.levels[i]
1657+
start = 0
1658+
stop = len(level)
1659+
ranges.append((start,stop))
1660+
1661+
return ranges
1662+
1663+
def _spec_to_array_indices(ix, specs):
1664+
"""Convert a tuple of slices/label lists/labels to a level-wise spec
1665+
1666+
Parameters
1667+
----------
1668+
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
1669+
specs: a list of 2-tuples/list of label positions. Specifically, The
1670+
output of _tuple_to_mi_locs.
1671+
len(specs) must matc ix.nlevels.
1672+
1673+
Returns
1674+
-------
1675+
a generator of row positions relative to ix, corresponding to specs.
1676+
Suitable for usage with `iloc`.
1677+
1678+
Example (This is *not* a doctest):
1679+
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
1680+
>>> for x in mi.get_values(): print(x)
1681+
('A0', 'B0')
1682+
('A0', 'B1')
1683+
('A1', 'B0')
1684+
('A1', 'B1')
1685+
('A2', 'B0')
1686+
('A2', 'B1')
1687+
1688+
>>> specs = _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
1689+
>>> list(_spec_to_array_indices(mi, specs))
1690+
[0, 1, 2, 3]
1691+
1692+
Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
1693+
and 'B0' or 'B1' at level = 0
1694+
1695+
"""
1696+
assert ix.is_lexsorted_for_tuple(specs)
1697+
assert len(specs) == ix.nlevels
1698+
assert ix.is_unique
1699+
assert isinstance(ix,MultiIndex)
1700+
1701+
# step size/increment for iteration at each level
1702+
giant_steps = np.cumprod(ix.levshape[::-1])[::-1]
1703+
giant_steps[:-1] = giant_steps[1:]
1704+
giant_steps[-1] = 1
1705+
1706+
def _iter_vectorize(specs, i=0):
1707+
step_size = giant_steps[i]
1708+
spec=specs[i]
1709+
if isinstance(spec,tuple):
1710+
# tuples are 2-tuples of (start,stop) label indices to include
1711+
valrange = compat.range(*spec)
1712+
elif isinstance(spec,list):
1713+
# lists are discrete label indicies to include
1714+
valrange = spec
1715+
1716+
if len(specs)-1 == i:
1717+
return np.array(valrange)
1718+
else:
1719+
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
1720+
res=np.tile(tmpl,(len(valrange),1))
1721+
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
1722+
return (res+steps).flatten()
1723+
1724+
1725+
def _iter_generator(specs, i=0):
1726+
step_size = giant_steps[i]
1727+
spec=specs[i]
1728+
if isinstance(spec,tuple):
1729+
# tuples are 2-tuples of (start,stop) label indices to include
1730+
valrange = compat.range(*spec)
1731+
elif isinstance(spec,list):
1732+
# lists are discrete label indicies to include
1733+
valrange = spec
1734+
1735+
if len(specs)-1 == i:
1736+
# base case
1737+
for v in valrange:
1738+
yield v
1739+
else:
1740+
for base in valrange:
1741+
base *= step_size
1742+
for v in _iter_generator(specs,i+1):
1743+
yield base + v
1744+
# validate
1745+
1746+
return _iter_vectorize(specs)

0 commit comments

Comments
 (0)