Skip to content

Commit 30eb6db

Browse files
committed
CLN: move indexing loc changes to index.py
1 parent 372e77f commit 30eb6db

File tree

2 files changed

+175
-178
lines changed

2 files changed

+175
-178
lines changed

pandas/core/index.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3249,6 +3249,179 @@ def _get_level_indexer(self, key, level=0):
32493249
j = labels.searchsorted(loc, side='right')
32503250
return slice(i, j)
32513251

3252+
def get_locs(self, tup):
3253+
"""Convert a tuple of slices/label lists/labels to a level-wise spec
3254+
3255+
Parameters
3256+
----------
3257+
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
3258+
tup: a tuple of slices, labels or lists of labels.
3259+
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
3260+
will have labels from trailing levels included.
3261+
3262+
Returns
3263+
-------
3264+
a list containing ix.nlevels elements of either:
3265+
- 2-tuple representing a (start,stop) slice
3266+
or
3267+
- a list of label positions.
3268+
3269+
The positions are relative to the labels of the corresponding level, not to
3270+
the entire unrolled index.
3271+
3272+
Example (This is *not* a doctest):
3273+
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
3274+
>>> for x in mi.get_values(): print(x)
3275+
('A0', 'B0')
3276+
('A0', 'B1')
3277+
('A1', 'B0')
3278+
('A1', 'B1')
3279+
('A2', 'B0')
3280+
('A2', 'B1')
3281+
>>> mi.get_specs((slice('A0','A2'),['B0', 'B1']))
3282+
[(0, 2), [0, 1]]
3283+
3284+
read as:
3285+
- All labels in position [0,1) in first level
3286+
- for each of those, all labels at positions 0 or 1.
3287+
3288+
The same effective result can be achieved by specifying the None Slice,
3289+
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
3290+
but the outcome is the same.
3291+
3292+
>>> mi.get_locs((slice('A0','A2'),slice(None)))
3293+
[(0, 2), (0,2)]
3294+
3295+
>>> mi.get_locs((slice('A0','A2'),))
3296+
[(0, 2), (0,2)]
3297+
3298+
"""
3299+
3300+
ranges = []
3301+
3302+
# self must be lexsorted to at least as many levels
3303+
# as there are elements in `tup`
3304+
assert self.is_lexsorted_for_tuple(tup)
3305+
assert self.is_unique
3306+
assert isinstance(self,MultiIndex)
3307+
3308+
for i,k in enumerate(tup):
3309+
level = self.levels[i]
3310+
3311+
if com.is_list_like(k):
3312+
# a collection of labels to include from this level
3313+
ranges.append([level.get_loc(x) for x in k])
3314+
continue
3315+
if k == slice(None):
3316+
start = 0
3317+
stop = len(level)
3318+
elif isinstance(k,slice):
3319+
start = level.get_loc(k.start)
3320+
stop = len(level)
3321+
if k.stop:
3322+
stop = level.get_loc(k.stop)
3323+
else:
3324+
# a single label
3325+
start = level.get_loc(k)
3326+
stop = start
3327+
3328+
ranges.append((start,stop))
3329+
3330+
for i in range(i+1,len(self.levels)):
3331+
# omitting trailing dims
3332+
# means include all values
3333+
level = self.levels[i]
3334+
start = 0
3335+
stop = len(level)
3336+
ranges.append((start,stop))
3337+
3338+
return ranges
3339+
3340+
def locs_to_indexer(self, specs):
3341+
""" Take a location specification to an indexer
3342+
3343+
Parameters
3344+
----------
3345+
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
3346+
specs: a list of 2-tuples/list of label positions. Specifically, The
3347+
output of _tuple_to_mi_locs.
3348+
len(specs) must matc ix.nlevels.
3349+
3350+
Returns
3351+
-------
3352+
a generator of row positions relative to ix, corresponding to specs.
3353+
Suitable for usage with `iloc`.
3354+
3355+
Example (This is *not* a doctest):
3356+
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
3357+
>>> for x in mi.get_values(): print(x)
3358+
('A0', 'B0')
3359+
('A0', 'B1')
3360+
('A1', 'B0')
3361+
('A1', 'B1')
3362+
('A2', 'B0')
3363+
('A2', 'B1')
3364+
3365+
>>> locs = mi.get_locs((slice('A0','A2'),['B0', 'B1']))
3366+
>>> list(mi.locs_to_indexer(locs))
3367+
[0, 1, 2, 3]
3368+
3369+
Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
3370+
and 'B0' or 'B1' at level = 0
3371+
3372+
"""
3373+
assert self.is_lexsorted_for_tuple(specs)
3374+
assert len(specs) == self.nlevels
3375+
assert self.is_unique
3376+
assert isinstance(self,MultiIndex)
3377+
3378+
# step size/increment for iteration at each level
3379+
giant_steps = np.cumprod(self.levshape[::-1])[::-1]
3380+
giant_steps[:-1] = giant_steps[1:]
3381+
giant_steps[-1] = 1
3382+
3383+
def _iter_vectorize(specs, i=0):
3384+
step_size = giant_steps[i]
3385+
spec=specs[i]
3386+
if isinstance(spec,tuple):
3387+
# tuples are 2-tuples of (start,stop) label indices to include
3388+
valrange = compat.range(*spec)
3389+
elif isinstance(spec,list):
3390+
# lists are discrete label indicies to include
3391+
valrange = spec
3392+
3393+
if len(specs)-1 == i:
3394+
return np.array(valrange)
3395+
else:
3396+
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
3397+
res=np.tile(tmpl,(len(valrange),1))
3398+
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
3399+
return (res+steps).flatten()
3400+
3401+
3402+
def _iter_generator(specs, i=0):
3403+
step_size = giant_steps[i]
3404+
spec=specs[i]
3405+
if isinstance(spec,tuple):
3406+
# tuples are 2-tuples of (start,stop) label indices to include
3407+
valrange = compat.range(*spec)
3408+
elif isinstance(spec,list):
3409+
# lists are discrete label indicies to include
3410+
valrange = spec
3411+
3412+
if len(specs)-1 == i:
3413+
# base case
3414+
for v in valrange:
3415+
yield v
3416+
else:
3417+
for base in valrange:
3418+
base *= step_size
3419+
for v in _iter_generator(specs,i+1):
3420+
yield base + v
3421+
# validate
3422+
3423+
return _iter_vectorize(specs)
3424+
32523425
def truncate(self, before=None, after=None):
32533426
"""
32543427
Slice index between two labels / tuples, return new MultiIndex

pandas/core/indexing.py

Lines changed: 2 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,11 +1181,8 @@ def _getitem_axis(self, key, axis=0):
11811181
return self._getitem_iterable(key, axis=axis)
11821182
elif isinstance(key, tuple) and isinstance(labels, MultiIndex) and \
11831183
any([isinstance(x,slice) for x in key]):
1184-
# handle per-axis tuple containting label criteria for
1185-
# each level (or a prefix of levels), may contain
1186-
# (None) slices, list of labels or labels
1187-
specs = _tuple_to_mi_locs(labels,key)
1188-
g = _spec_to_array_indices(labels, specs)
1184+
locs = labels.get_locs(key)
1185+
g = labels.locs_to_indexer(locs)
11891186
return self.obj.iloc[g]
11901187
else:
11911188
self._has_valid_type(key, axis)
@@ -1571,176 +1568,3 @@ def _maybe_droplevels(index, key):
15711568

15721569
return index
15731570

1574-
def _tuple_to_mi_locs(ix,tup):
1575-
"""Convert a tuple of slices/label lists/labels to a level-wise spec
1576-
1577-
Parameters
1578-
----------
1579-
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
1580-
tup: a tuple of slices, labels or lists of labels.
1581-
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
1582-
will have labels from trailing levels included.
1583-
1584-
Returns
1585-
-------
1586-
a list containing ix.nlevels elements of either:
1587-
- 2-tuple representing a (start,stop) slice
1588-
or
1589-
- a list of label positions.
1590-
1591-
The positions are relative to the labels of the corresponding level, not to
1592-
the entire unrolled index.
1593-
1594-
Example (This is *not* a doctest):
1595-
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
1596-
>>> for x in mi.get_values(): print(x)
1597-
('A0', 'B0')
1598-
('A0', 'B1')
1599-
('A1', 'B0')
1600-
('A1', 'B1')
1601-
('A2', 'B0')
1602-
('A2', 'B1')
1603-
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
1604-
[(0, 2), [0, 1]]
1605-
1606-
read as:
1607-
- All labels in position [0,1) in first level
1608-
- for each of those, all labels at positions 0 or 1.
1609-
1610-
The same effective result can be achieved by specifying the None Slice,
1611-
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
1612-
but the outcome is the same.
1613-
1614-
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),slice(None)))
1615-
[(0, 2), (0,2)]
1616-
1617-
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),))
1618-
[(0, 2), (0,2)]
1619-
1620-
"""
1621-
1622-
1623-
ranges = []
1624-
1625-
# ix must be lexsorted to at least as many levels
1626-
# as there are elements in `tup`
1627-
assert ix.is_lexsorted_for_tuple(tup)
1628-
assert ix.is_unique
1629-
assert isinstance(ix,MultiIndex)
1630-
1631-
for i,k in enumerate(tup):
1632-
level = ix.levels[i]
1633-
1634-
if _is_list_like(k):
1635-
# a collection of labels to include from this level
1636-
ranges.append([level.get_loc(x) for x in k])
1637-
continue
1638-
if k == slice(None):
1639-
start = 0
1640-
stop = len(level)
1641-
elif isinstance(k,slice):
1642-
start = level.get_loc(k.start)
1643-
stop = len(level)
1644-
if k.stop:
1645-
stop = level.get_loc(k.stop)
1646-
else:
1647-
# a single label
1648-
start = level.get_loc(k)
1649-
stop = start
1650-
1651-
ranges.append((start,stop))
1652-
1653-
for i in range(i+1,len(ix.levels)):
1654-
# omitting trailing dims
1655-
# means include all values
1656-
level = ix.levels[i]
1657-
start = 0
1658-
stop = len(level)
1659-
ranges.append((start,stop))
1660-
1661-
return ranges
1662-
1663-
def _spec_to_array_indices(ix, specs):
1664-
"""Convert a tuple of slices/label lists/labels to a level-wise spec
1665-
1666-
Parameters
1667-
----------
1668-
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
1669-
specs: a list of 2-tuples/list of label positions. Specifically, The
1670-
output of _tuple_to_mi_locs.
1671-
len(specs) must matc ix.nlevels.
1672-
1673-
Returns
1674-
-------
1675-
a generator of row positions relative to ix, corresponding to specs.
1676-
Suitable for usage with `iloc`.
1677-
1678-
Example (This is *not* a doctest):
1679-
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
1680-
>>> for x in mi.get_values(): print(x)
1681-
('A0', 'B0')
1682-
('A0', 'B1')
1683-
('A1', 'B0')
1684-
('A1', 'B1')
1685-
('A2', 'B0')
1686-
('A2', 'B1')
1687-
1688-
>>> specs = _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
1689-
>>> list(_spec_to_array_indices(mi, specs))
1690-
[0, 1, 2, 3]
1691-
1692-
Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
1693-
and 'B0' or 'B1' at level = 0
1694-
1695-
"""
1696-
assert ix.is_lexsorted_for_tuple(specs)
1697-
assert len(specs) == ix.nlevels
1698-
assert ix.is_unique
1699-
assert isinstance(ix,MultiIndex)
1700-
1701-
# step size/increment for iteration at each level
1702-
giant_steps = np.cumprod(ix.levshape[::-1])[::-1]
1703-
giant_steps[:-1] = giant_steps[1:]
1704-
giant_steps[-1] = 1
1705-
1706-
def _iter_vectorize(specs, i=0):
1707-
step_size = giant_steps[i]
1708-
spec=specs[i]
1709-
if isinstance(spec,tuple):
1710-
# tuples are 2-tuples of (start,stop) label indices to include
1711-
valrange = compat.range(*spec)
1712-
elif isinstance(spec,list):
1713-
# lists are discrete label indicies to include
1714-
valrange = spec
1715-
1716-
if len(specs)-1 == i:
1717-
return np.array(valrange)
1718-
else:
1719-
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
1720-
res=np.tile(tmpl,(len(valrange),1))
1721-
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
1722-
return (res+steps).flatten()
1723-
1724-
1725-
def _iter_generator(specs, i=0):
1726-
step_size = giant_steps[i]
1727-
spec=specs[i]
1728-
if isinstance(spec,tuple):
1729-
# tuples are 2-tuples of (start,stop) label indices to include
1730-
valrange = compat.range(*spec)
1731-
elif isinstance(spec,list):
1732-
# lists are discrete label indicies to include
1733-
valrange = spec
1734-
1735-
if len(specs)-1 == i:
1736-
# base case
1737-
for v in valrange:
1738-
yield v
1739-
else:
1740-
for base in valrange:
1741-
base *= step_size
1742-
for v in _iter_generator(specs,i+1):
1743-
yield base + v
1744-
# validate
1745-
1746-
return _iter_vectorize(specs)

0 commit comments

Comments
 (0)