Skip to content

Commit bd2e2a1

Browse files
committed
TST: tests for per_axis_per_level_getitem
ENH: add core/indexing.py/_getitem_nested_tuple to handle the nested_tuple cases for partial multi-indexing
1 parent 30eb6db commit bd2e2a1

File tree

3 files changed

+148
-63
lines changed

3 files changed

+148
-63
lines changed

pandas/core/index.py

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3249,7 +3249,7 @@ def _get_level_indexer(self, key, level=0):
32493249
j = labels.searchsorted(loc, side='right')
32503250
return slice(i, j)
32513251

3252-
def get_locs(self, tup):
3252+
def get_specs(self, tup):
32533253
"""Convert a tuple of slices/label lists/labels to a level-wise spec
32543254
32553255
Parameters
@@ -3322,8 +3322,9 @@ def get_locs(self, tup):
33223322
stop = level.get_loc(k.stop)
33233323
else:
33243324
# a single label
3325-
start = level.get_loc(k)
3326-
stop = start
3325+
# make this into a list of a tuple
3326+
ranges.append([level.get_loc(k)])
3327+
continue
33273328

33283329
ranges.append((start,stop))
33293330

@@ -3337,14 +3338,14 @@ def get_locs(self, tup):
33373338

33383339
return ranges
33393340

3340-
def locs_to_indexer(self, specs):
3341+
def specs_to_indexer(self, specs):
33413342
""" Take a location specification to an indexer
33423343
33433344
Parameters
33443345
----------
33453346
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
33463347
specs: a list of 2-tuples/list of label positions. Specifically, The
3347-
output of _tuple_to_mi_locs.
3348+
output of get_specs
33483349
len(specs) must matc ix.nlevels.
33493350
33503351
Returns
@@ -3362,8 +3363,8 @@ def locs_to_indexer(self, specs):
33623363
('A2', 'B0')
33633364
('A2', 'B1')
33643365
3365-
>>> locs = mi.get_locs((slice('A0','A2'),['B0', 'B1']))
3366-
>>> list(mi.locs_to_indexer(locs))
3366+
>>> locs = mi.get_specs((slice('A0','A2'),['B0', 'B1']))
3367+
>>> list(mi.specs_to_indexer(locs))
33673368
[0, 1, 2, 3]
33683369
33693370
Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
@@ -3393,33 +3394,11 @@ def _iter_vectorize(specs, i=0):
33933394
if len(specs)-1 == i:
33943395
return np.array(valrange)
33953396
else:
3396-
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
3397+
tmpl=np.array([v for v in _iter_vectorize(specs,i+1)])
33973398
res=np.tile(tmpl,(len(valrange),1))
33983399
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
33993400
return (res+steps).flatten()
34003401

3401-
3402-
def _iter_generator(specs, i=0):
3403-
step_size = giant_steps[i]
3404-
spec=specs[i]
3405-
if isinstance(spec,tuple):
3406-
# tuples are 2-tuples of (start,stop) label indices to include
3407-
valrange = compat.range(*spec)
3408-
elif isinstance(spec,list):
3409-
# lists are discrete label indicies to include
3410-
valrange = spec
3411-
3412-
if len(specs)-1 == i:
3413-
# base case
3414-
for v in valrange:
3415-
yield v
3416-
else:
3417-
for base in valrange:
3418-
base *= step_size
3419-
for v in _iter_generator(specs,i+1):
3420-
yield base + v
3421-
# validate
3422-
34233402
return _iter_vectorize(specs)
34243403

34253404
def truncate(self, before=None, after=None):

pandas/core/indexing.py

Lines changed: 83 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -693,35 +693,39 @@ def _convert_for_reindex(self, key, axis=0):
693693

694694
return keyarr
695695

696-
def _getitem_lowerdim(self, tup):
696+
def _handle_lowerdim_multi_index_axis0(self, tup):
697+
# we have an axis0 multi-index, handle or raise
697698

698-
ax0 = self.obj._get_axis(0)
699-
# a bit kludgy
700-
if isinstance(ax0, MultiIndex):
701-
try:
702-
# fast path for series or for tup devoid of slices
703-
return self._get_label(tup, axis=0)
704-
except TypeError:
705-
# slices are unhashable
706-
pass
707-
except Exception as e1:
708-
if isinstance(tup[0], (slice, Index)):
709-
raise IndexingError("Handle elsewhere")
699+
try:
700+
# fast path for series or for tup devoid of slices
701+
return self._get_label(tup, axis=0)
702+
except TypeError:
703+
# slices are unhashable
704+
pass
705+
except Exception as e1:
706+
if isinstance(tup[0], (slice, Index)):
707+
raise IndexingError("Handle elsewhere")
710708

711-
# raise the error if we are not sorted
712-
if not ax0.is_lexsorted_for_tuple(tup):
713-
raise e1
709+
# raise the error if we are not sorted
710+
ax0 = self.obj._get_axis(0)
711+
if not ax0.is_lexsorted_for_tuple(tup):
712+
raise e1
714713

715-
# GH911 introduced this clause, but the regression test
716-
# added for it now passes even without it. Let's rock the boat.
717-
# 2014/01/27
714+
return None
718715

719-
# # should we abort, or keep going?
720-
# try:
721-
# loc = ax0.get_loc(tup[0])
722-
# except KeyError:
723-
# raise e1
716+
def _getitem_lowerdim(self, tup):
724717

718+
# we may have a nested tuples indexer here
719+
if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]):
720+
if any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ]):
721+
return self._getitem_nested_tuple(tup)
722+
723+
# we maybe be using a tuple to represent multiple dimensions here
724+
ax0 = self.obj._get_axis(0)
725+
if isinstance(ax0, MultiIndex):
726+
result = self._handle_lowerdim_multi_index_axis0(tup)
727+
if result is not None:
728+
return result
725729

726730
if len(tup) > self.obj.ndim:
727731
raise IndexingError("Too many indexers. handle elsewhere")
@@ -760,7 +764,31 @@ def _getitem_lowerdim(self, tup):
760764

761765
raise IndexingError('not applicable')
762766

763-
def _getitem_axis(self, key, axis=0):
767+
def _getitem_nested_tuple(self, tup):
768+
# we have a nested tuple so have at least 1 multi-index level
769+
# we should be able to match up the dimensionaility here
770+
771+
# we have too many indexers for our dim, but have at least 1
772+
# multi-index dimension, try to see if we have something like
773+
# a tuple passed to a series with a multi-index
774+
if len(tup) > self.ndim:
775+
return self._handle_lowerdim_multi_index_axis0(tup)
776+
777+
# handle the multi-axis by taking sections and reducing
778+
# this is iterative
779+
obj = self.obj
780+
axis = 0
781+
for key in tup:
782+
783+
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis, validate_iterable=True)
784+
axis += 1
785+
786+
if obj.ndim < self.ndim:
787+
axis -= 1
788+
789+
return obj
790+
791+
def _getitem_axis(self, key, axis=0, validate_iterable=False):
764792

765793
self._has_valid_type(key, axis)
766794
labels = self.obj._get_axis(axis)
@@ -1058,7 +1086,7 @@ def __getitem__(self, key):
10581086
else:
10591087
return self._getitem_axis(key, axis=0)
10601088

1061-
def _getitem_axis(self, key, axis=0):
1089+
def _getitem_axis(self, key, axis=0, validate_iterable=False):
10621090
raise NotImplementedError()
10631091

10641092
def _getbool_axis(self, key, axis=0):
@@ -1135,6 +1163,7 @@ def _has_valid_type(self, key, axis):
11351163
# require all elements in the index
11361164
idx = _ensure_index(key)
11371165
if not idx.isin(ax).all():
1166+
11381167
raise KeyError("[%s] are not in ALL in the [%s]" %
11391168
(key, self.obj._get_axis_name(axis)))
11401169

@@ -1164,7 +1193,7 @@ def error():
11641193

11651194
return True
11661195

1167-
def _getitem_axis(self, key, axis=0):
1196+
def _getitem_axis(self, key, axis=0, validate_iterable=False):
11681197
labels = self.obj._get_axis(axis)
11691198

11701199
if isinstance(key, slice):
@@ -1178,12 +1207,15 @@ def _getitem_axis(self, key, axis=0):
11781207
if hasattr(key, 'ndim') and key.ndim > 1:
11791208
raise ValueError('Cannot index with multidimensional key')
11801209

1210+
if validate_iterable:
1211+
self._has_valid_type(key, axis)
11811212
return self._getitem_iterable(key, axis=axis)
1182-
elif isinstance(key, tuple) and isinstance(labels, MultiIndex) and \
1183-
any([isinstance(x,slice) for x in key]):
1184-
locs = labels.get_locs(key)
1185-
g = labels.locs_to_indexer(locs)
1186-
return self.obj.iloc[g]
1213+
elif _is_nested_tuple(key, labels):
1214+
specs = labels.get_specs(key)
1215+
g = labels.specs_to_indexer(specs)
1216+
indexer = [ slice(None) ] * self.ndim
1217+
indexer[axis] = g
1218+
return self.obj.iloc[tuple(indexer)]
11871219
else:
11881220
self._has_valid_type(key, axis)
11891221
return self._get_label(key, axis=axis)
@@ -1256,7 +1288,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
12561288
else:
12571289
return self.obj.take(slice_obj, axis=axis, convert=False)
12581290

1259-
def _getitem_axis(self, key, axis=0):
1291+
def _getitem_axis(self, key, axis=0, validate_iterable=False):
12601292

12611293
if isinstance(key, slice):
12621294
self._has_valid_type(key, axis)
@@ -1515,6 +1547,24 @@ def _maybe_convert_ix(*args):
15151547
return args
15161548

15171549

1550+
def _is_nested_tuple(tup, labels):
1551+
# check for a compatiable nested tuple and multiindexes among the axes
1552+
1553+
if not isinstance(tup, tuple):
1554+
return False
1555+
1556+
# are we nested tuple of: tuple,list,slice
1557+
for i, k in enumerate(tup):
1558+
1559+
#if i > len(axes):
1560+
# raise IndexingError("invalid indxing tuple passed, has too many indexers for this object")
1561+
#ax = axes[i]
1562+
if isinstance(k, (tuple, list, slice)):
1563+
return isinstance(labels, MultiIndex)
1564+
1565+
return False
1566+
1567+
15181568
def _is_null_slice(obj):
15191569
return (isinstance(obj, slice) and obj.start is None and
15201570
obj.stop is None and obj.step is None)

pandas/tests/test_indexing.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,62 @@ def test_xs_multiindex(self):
10621062
expected.columns = expected.columns.droplevel('lvl1')
10631063
assert_frame_equal(result, expected)
10641064

1065+
def test_per_axis_per_level_getitem(self):
1066+
1067+
# GH6134
1068+
# example test case
1069+
def mklbl(prefix,n):
1070+
return ["%s%s" % (prefix,i) for i in range(n)]
1071+
1072+
ix = MultiIndex.from_product([mklbl('A',5),mklbl('B',7),mklbl('C',4),mklbl('D',2)])
1073+
df = DataFrame(np.arange(len(ix.get_values())),index=ix)
1074+
result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:]
1075+
expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if (a == 'A1' or a == 'A2') and (c == 'C1' or c == 'C3')]]
1076+
assert_frame_equal(result, expected)
1077+
1078+
# test multi-index slicing with per axis and per index controls
1079+
index = MultiIndex.from_tuples([('A',1),('A',2),('A',3),('B',1)],
1080+
names=['one','two'])
1081+
columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','hello'),('b','world')],
1082+
names=['lvl0', 'lvl1'])
1083+
1084+
df = DataFrame(np.arange(16).reshape(4, 4), index=index, columns=columns)
1085+
df = df.sortlevel(axis=0).sortlevel(axis=1)
1086+
1087+
# identity
1088+
result = df.loc[(slice(None),slice(None)),:]
1089+
assert_frame_equal(result, df)
1090+
result = df.loc[(slice(None),slice(None)),(slice(None),slice(None))]
1091+
assert_frame_equal(result, df)
1092+
result = df.loc[:,(slice(None),slice(None))]
1093+
assert_frame_equal(result, df)
1094+
1095+
# index
1096+
result = df.loc[(slice(None),[1]),:]
1097+
expected = df.iloc[[0,3]]
1098+
assert_frame_equal(result, expected)
1099+
1100+
result = df.loc[(slice(None),1),:]
1101+
expected = df.iloc[[0,3]]
1102+
assert_frame_equal(result, expected)
1103+
1104+
# columns
1105+
result = df.loc[:,(slice(None),['world'])]
1106+
expected = df.iloc[:,[3]]
1107+
assert_frame_equal(result, expected)
1108+
1109+
# both
1110+
result = df.loc[(slice(None),1),(slice(None),['world'])]
1111+
expected = df.iloc[[0,3],[3]]
1112+
assert_frame_equal(result, expected)
1113+
1114+
# ambiguous cases
1115+
# these can be multiply interpreted
1116+
# but we can catch this in some cases
1117+
def f():
1118+
df.loc[(slice(None),[1])]
1119+
self.assertRaises(KeyError, f)
1120+
10651121
def test_getitem_multiindex(self):
10661122

10671123
# GH 5725

0 commit comments

Comments
 (0)