Skip to content

BUG: in multi-indexing with a partial string selection (GH4758) #4761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 6, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indicies (:issue:`4726`)
- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. (:issue:`4328`)
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)

pandas 0.12
===========
Expand Down
42 changes: 32 additions & 10 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2596,10 +2596,15 @@ def _maybe_drop_levels(indexer, levels, drop_level):
if not drop_level:
return self[indexer]
# kludgearound
new_index = self[indexer]
orig_index = new_index = self[indexer]
levels = [self._get_level_number(i) for i in levels]
for i in sorted(levels, reverse=True):
new_index = new_index.droplevel(i)
try:
new_index = new_index.droplevel(i)
except:

# no dropping here
return orig_index
return new_index

if isinstance(level, (tuple, list)):
Expand Down Expand Up @@ -2635,20 +2640,37 @@ def _maybe_drop_levels(indexer, levels, drop_level):
pass

if not any(isinstance(k, slice) for k in key):
if len(key) == self.nlevels:
if self.is_unique:
return self._engine.get_loc(_values_from_object(key)), None
else:
indexer = slice(*self.slice_locs(key, key))
return indexer, self[indexer]
else:
# partial selection

# partial selection
def partial_selection(key):
indexer = slice(*self.slice_locs(key, key))
if indexer.start == indexer.stop:
raise KeyError(key)
ilevels = [i for i in range(len(key))
if key[i] != slice(None, None)]
return indexer, _maybe_drop_levels(indexer, ilevels, drop_level)

if len(key) == self.nlevels:

if self.is_unique:

# here we have a completely specified key, but are using some partial string matching here
# GH4758
can_index_exactly = any([ l.is_all_dates and not isinstance(k,compat.string_types) for k, l in zip(key, self.levels) ])
if any([ l.is_all_dates for k, l in zip(key, self.levels) ]) and not can_index_exactly:
indexer = slice(*self.slice_locs(key, key))

# we have a multiple selection here
if not indexer.stop-indexer.start == 1:
return partial_selection(key)

key = tuple(self[indexer].tolist()[0])

return self._engine.get_loc(_values_from_object(key)), None
else:
return partial_selection(key)
else:
return partial_selection(key)
else:
indexer = None
for i, k in enumerate(key):
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,9 +952,15 @@ def _has_valid_type(self, key, axis):
if not len(ax):
raise KeyError("The [%s] axis is empty" % self.obj._get_axis_name(axis))

if not key in ax:
try:
if not key in ax:
raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis)))
except (TypeError):

# if we have a weird type of key/ax
raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis)))


return True

def _getitem_axis(self, key, axis=0):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,9 +1842,9 @@ def test_duplicate_mi(self):
columns=list('ABCD'))
df = df.set_index(['A','B'])
df = df.sortlevel(0)
result = df.loc[('foo','bar')]
expected = DataFrame([['foo','bar',1.0,1],['foo','bar',2.0,2],['foo','bar',5.0,5]],
columns=list('ABCD')).set_index(['A','B'])
result = df.loc[('foo','bar')]
assert_frame_equal(result,expected)

def test_multiindex_set_index(self):
Expand Down
1 change: 1 addition & 0 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']):
exp = self.data.groupby(rows)[col].mean()
tm.assert_series_equal(cmarg, exp)

res.sortlevel(inplace=True)
rmarg = res.xs(('All', ''))[:-1]
exp = self.data.groupby(cols)[col].mean()
tm.assert_series_equal(rmarg, exp)
Expand Down
Loading