Skip to content

Commit 7320263

Browse files
committed
ENH: allow core/index/_get_loc_level to deal with a slice indexer for a particular level
ENH: remove get_specs/specs_to_index -> replace with get_locs, to directly compute an indexer for a multi-level specification
1 parent bd2e2a1 commit 7320263

File tree

3 files changed

+64
-140
lines changed

3 files changed

+64
-140
lines changed

pandas/core/index.py

Lines changed: 43 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# pylint: disable=E1101,E1103,W0232
22
import datetime
33
from functools import partial
4-
from pandas.compat import range, zip, lrange, lzip, u
4+
from pandas.compat import range, zip, lrange, lzip, u, reduce
55
from pandas import compat
66
import numpy as np
77

@@ -3231,6 +3231,13 @@ def partial_selection(key):
32313231
if key[i] != slice(None, None)]
32323232
return indexer, _maybe_drop_levels(indexer, ilevels,
32333233
drop_level)
3234+
elif isinstance(key, slice):
3235+
# handle a passed slice for this level
3236+
start = self._get_level_indexer(key.start,level=level)
3237+
stop = self._get_level_indexer(key.stop,level=level)
3238+
step = key.step
3239+
indexer = slice(start.start,stop.start,step)
3240+
return indexer, _maybe_drop_levels(indexer, [level], drop_level)
32343241
else:
32353242
indexer = self._get_level_indexer(key, level=level)
32363243
new_index = _maybe_drop_levels(indexer, [level], drop_level)
@@ -3249,157 +3256,61 @@ def _get_level_indexer(self, key, level=0):
32493256
j = labels.searchsorted(loc, side='right')
32503257
return slice(i, j)
32513258

3252-
def get_specs(self, tup):
3253-
"""Convert a tuple of slices/label lists/labels to a level-wise spec
3259+
def get_locs(self, tup):
3260+
"""
3261+
Given a tuple of slices/lists/labels to a level-wise spec
3262+
produce an indexer to extract those locations
32543263
32553264
Parameters
32563265
----------
3257-
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
3258-
tup: a tuple of slices, labels or lists of labels.
3259-
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
3260-
will have labels from trailing levels included.
3266+
key : tuple of (slices/list/labels)
32613267
32623268
Returns
32633269
-------
3264-
a list containing ix.nlevels elements of either:
3265-
- 2-tuple representing a (start,stop) slice
3266-
or
3267-
- a list of label positions.
3268-
3269-
The positions are relative to the labels of the corresponding level, not to
3270-
the entire unrolled index.
3271-
3272-
Example (This is *not* a doctest):
3273-
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
3274-
>>> for x in mi.get_values(): print(x)
3275-
('A0', 'B0')
3276-
('A0', 'B1')
3277-
('A1', 'B0')
3278-
('A1', 'B1')
3279-
('A2', 'B0')
3280-
('A2', 'B1')
3281-
>>> mi.get_specs((slice('A0','A2'),['B0', 'B1']))
3282-
[(0, 2), [0, 1]]
3283-
3284-
read as:
3285-
- All labels in position [0,1) in first level
3286-
- for each of those, all labels at positions 0 or 1.
3287-
3288-
The same effective result can be achieved by specifying the None Slice,
3289-
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
3290-
but the outcome is the same.
3291-
3292-
>>> mi.get_locs((slice('A0','A2'),slice(None)))
3293-
[(0, 2), (0,2)]
3294-
3295-
>>> mi.get_locs((slice('A0','A2'),))
3296-
[(0, 2), (0,2)]
3297-
3270+
locs : integer list of locations or boolean indexer suitable
3271+
for passing to iloc
32983272
"""
32993273

3300-
ranges = []
3301-
3302-
# self must be lexsorted to at least as many levels
3303-
# as there are elements in `tup`
3274+
# must be lexsorted to at least as many levels
33043275
assert self.is_lexsorted_for_tuple(tup)
33053276
assert self.is_unique
3306-
assert isinstance(self,MultiIndex)
33073277

3278+
def _convert_indexer(r):
3279+
if isinstance(r, slice):
3280+
m = np.zeros(len(self),dtype=bool)
3281+
m[r] = True
3282+
return m
3283+
return r
3284+
3285+
ranges = []
33083286
for i,k in enumerate(tup):
3309-
level = self.levels[i]
33103287

33113288
if com.is_list_like(k):
3312-
# a collection of labels to include from this level
3313-
ranges.append([level.get_loc(x) for x in k])
3314-
continue
3315-
if k == slice(None):
3316-
start = 0
3317-
stop = len(level)
3289+
# a collection of labels to include from this level (these are or'd)
3290+
ranges.append(reduce(
3291+
np.logical_or,[ _convert_indexer(self._get_level_indexer(x, level=i)
3292+
) for x in k ]))
3293+
elif k == slice(None):
3294+
# include all from this level
3295+
pass
33183296
elif isinstance(k,slice):
3319-
start = level.get_loc(k.start)
3320-
stop = len(level)
3321-
if k.stop:
3322-
stop = level.get_loc(k.stop)
3297+
start = self._get_level_indexer(k.start,level=i)
3298+
stop = self._get_level_indexer(k.stop,level=i)
3299+
step = k.step
3300+
ranges.append(slice(start.start,stop.start,step))
33233301
else:
33243302
# a single label
3325-
# make this into a list of a tuple
3326-
ranges.append([level.get_loc(k)])
3327-
continue
3328-
3329-
ranges.append((start,stop))
3303+
ranges.append(self.get_loc_level(k,level=i,drop_level=False)[0])
33303304

3331-
for i in range(i+1,len(self.levels)):
3332-
# omitting trailing dims
3333-
# means include all values
3334-
level = self.levels[i]
3335-
start = 0
3336-
stop = len(level)
3337-
ranges.append((start,stop))
3305+
# identity
3306+
if len(ranges) == 0:
3307+
return slice(0,len(self))
33383308

3339-
return ranges
3340-
3341-
def specs_to_indexer(self, specs):
3342-
""" Take a location specification to an indexer
3343-
3344-
Parameters
3345-
----------
3346-
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
3347-
specs: a list of 2-tuples/list of label positions. Specifically, The
3348-
output of get_specs
3349-
len(specs) must matc ix.nlevels.
3350-
3351-
Returns
3352-
-------
3353-
a generator of row positions relative to ix, corresponding to specs.
3354-
Suitable for usage with `iloc`.
3355-
3356-
Example (This is *not* a doctest):
3357-
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
3358-
>>> for x in mi.get_values(): print(x)
3359-
('A0', 'B0')
3360-
('A0', 'B1')
3361-
('A1', 'B0')
3362-
('A1', 'B1')
3363-
('A2', 'B0')
3364-
('A2', 'B1')
3365-
3366-
>>> locs = mi.get_specs((slice('A0','A2'),['B0', 'B1']))
3367-
>>> list(mi.specs_to_indexer(locs))
3368-
[0, 1, 2, 3]
3369-
3370-
Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
3371-
and 'B0' or 'B1' at level = 0
3372-
3373-
"""
3374-
assert self.is_lexsorted_for_tuple(specs)
3375-
assert len(specs) == self.nlevels
3376-
assert self.is_unique
3377-
assert isinstance(self,MultiIndex)
3378-
3379-
# step size/increment for iteration at each level
3380-
giant_steps = np.cumprod(self.levshape[::-1])[::-1]
3381-
giant_steps[:-1] = giant_steps[1:]
3382-
giant_steps[-1] = 1
3383-
3384-
def _iter_vectorize(specs, i=0):
3385-
step_size = giant_steps[i]
3386-
spec=specs[i]
3387-
if isinstance(spec,tuple):
3388-
# tuples are 2-tuples of (start,stop) label indices to include
3389-
valrange = compat.range(*spec)
3390-
elif isinstance(spec,list):
3391-
# lists are discrete label indicies to include
3392-
valrange = spec
3393-
3394-
if len(specs)-1 == i:
3395-
return np.array(valrange)
3396-
else:
3397-
tmpl=np.array([v for v in _iter_vectorize(specs,i+1)])
3398-
res=np.tile(tmpl,(len(valrange),1))
3399-
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
3400-
return (res+steps).flatten()
3309+
elif len(ranges) == 1:
3310+
return ranges[0]
34013311

3402-
return _iter_vectorize(specs)
3312+
# construct a boolean indexer if we have a slice or boolean indexer
3313+
return reduce(np.logical_and,[ _convert_indexer(r) for r in ranges ])
34033314

34043315
def truncate(self, before=None, after=None):
34053316
"""

pandas/core/indexing.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,10 @@ def _getitem_nested_tuple(self, tup):
780780
axis = 0
781781
for key in tup:
782782

783+
if _is_null_slice(key):
784+
axis += 1
785+
continue
786+
783787
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis, validate_iterable=True)
784788
axis += 1
785789

@@ -1211,10 +1215,9 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
12111215
self._has_valid_type(key, axis)
12121216
return self._getitem_iterable(key, axis=axis)
12131217
elif _is_nested_tuple(key, labels):
1214-
specs = labels.get_specs(key)
1215-
g = labels.specs_to_indexer(specs)
1218+
locs = labels.get_locs(key)
12161219
indexer = [ slice(None) ] * self.ndim
1217-
indexer[axis] = g
1220+
indexer[axis] = locs
12181221
return self.obj.iloc[tuple(indexer)]
12191222
else:
12201223
self._has_valid_type(key, axis)

pandas/tests/test_indexing.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,7 @@ def mklbl(prefix,n):
10781078
# test multi-index slicing with per axis and per index controls
10791079
index = MultiIndex.from_tuples([('A',1),('A',2),('A',3),('B',1)],
10801080
names=['one','two'])
1081-
columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','hello'),('b','world')],
1081+
columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','foo'),('b','bah')],
10821082
names=['lvl0', 'lvl1'])
10831083

10841084
df = DataFrame(np.arange(16).reshape(4, 4), index=index, columns=columns)
@@ -1102,13 +1102,23 @@ def mklbl(prefix,n):
11021102
assert_frame_equal(result, expected)
11031103

11041104
# columns
1105-
result = df.loc[:,(slice(None),['world'])]
1106-
expected = df.iloc[:,[3]]
1105+
result = df.loc[:,(slice(None),['foo'])]
1106+
expected = df.iloc[:,[1,3]]
11071107
assert_frame_equal(result, expected)
11081108

11091109
# both
1110-
result = df.loc[(slice(None),1),(slice(None),['world'])]
1111-
expected = df.iloc[[0,3],[3]]
1110+
result = df.loc[(slice(None),1),(slice(None),['foo'])]
1111+
expected = df.iloc[[0,3],[1,3]]
1112+
assert_frame_equal(result, expected)
1113+
1114+
result = df.loc['A','a']
1115+
expected = DataFrame(dict(bar = [1,5,9], foo = [0,4,8]),
1116+
index=Index([1,2,3],name='two'),
1117+
columns=Index(['bar','foo'],name='lvl1'))
1118+
assert_frame_equal(result, expected)
1119+
1120+
result = df.loc[(slice(None),[1,2]),:]
1121+
expected = df.iloc[[0,1,3]]
11121122
assert_frame_equal(result, expected)
11131123

11141124
# ambiguous cases

0 commit comments

Comments
 (0)