Skip to content

Commit e5490ef

Browse files
committed
BUG: enable .ix to better handle indexes with duplicates, close #1201
1 parent 19e0d01 commit e5490ef

File tree

6 files changed

+55
-6
lines changed

6 files changed

+55
-6
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ pandas 0.8.0
3535
(#864)
3636
- Add limit argument for forward/backward filling to reindex, fillna,
3737
etc. (#825 and others)
38+
- Add support for indexes (dates or otherwise) with duplicates and common
39+
sense indexing/selection functionality
3840

3941
**Improvements to existing features**
4042

pandas/core/frame.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,8 +1469,11 @@ def _getitem_array(self, key):
14691469
raise ValueError('Item wrong length %d instead of %d!' %
14701470
(len(key), len(self.index)))
14711471

1472-
new_index = self.index[key]
1473-
return self.reindex(new_index)
1472+
inds, = key.nonzero()
1473+
return self.take(inds)
1474+
1475+
# new_index = self.index[key]
1476+
# return self.reindex(new_index)
14741477
else:
14751478
indexer = self.columns.get_indexer(key)
14761479
mask = indexer == -1
@@ -1698,12 +1701,21 @@ def xs(self, key, axis=0, level=None, copy=True):
16981701
index = self.index
16991702
if isinstance(index, MultiIndex):
17001703
loc, new_index = self.index.get_loc_level(key)
1701-
elif isinstance(index, DatetimeIndex):
1704+
else:
17021705
loc = self.index.get_loc(key)
1706+
1707+
if isinstance(loc, np.ndarray):
1708+
if loc.dtype == np.bool_:
1709+
inds, = loc.nonzero()
1710+
if len(inds) == 1:
1711+
loc = inds[0]
1712+
else:
1713+
return self.take(inds, axis=axis)
1714+
else:
1715+
return self.take(loc, axis=axis)
1716+
17031717
if not np.isscalar(loc):
17041718
new_index = self.index[loc]
1705-
else:
1706-
loc = self.index.get_loc(key)
17071719

17081720
if np.isscalar(loc):
17091721
new_values = self._data.fast_2d_xs(loc, copy=copy)

pandas/core/indexing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,9 @@ def _reindex(keys, level=None):
212212
else:
213213
# asarray can be unsafe, NumPy strings are weird
214214
keyarr = _asarray_tuplesafe(key)
215+
215216
if _is_integer_dtype(keyarr) and not _is_integer_index(labels):
216-
keyarr = labels.take(keyarr)
217+
return self.obj.take(keyarr, axis=axis)
217218

218219
# this is not the most robust, but...
219220
if (isinstance(labels, MultiIndex) and

pandas/tests/test_frame.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,23 @@ def test_getitem_boolean_missing(self):
877877
def test_setitem_boolean_missing(self):
878878
pass
879879

880+
def test_getitem_setitem_ix_duplicates(self):
881+
# #1201
882+
df = DataFrame(np.random.randn(5, 3),
883+
index=['foo', 'foo', 'bar', 'baz', 'bar'])
884+
885+
result = df.ix['foo']
886+
expected = df[:2]
887+
assert_frame_equal(result, expected)
888+
889+
result = df.ix['bar']
890+
expected = df.ix[[2, 4]]
891+
assert_frame_equal(result, expected)
892+
893+
result = df.ix['baz']
894+
expected = df.ix[3]
895+
assert_series_equal(result, expected)
896+
880897
def test_get_value(self):
881898
for idx in self.frame.index:
882899
for col in self.frame.columns:

pandas/tseries/tests/test_timeseries.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,11 @@ def test_at_time(self):
565565
assert_series_equal(result, expected)
566566
tm.assert_frame_equal(result_df, exp_df)
567567

568+
chunk = df.ix['1/4/2000':]
569+
result = chunk.ix[time(9, 30)]
570+
expected = result_df[-1:]
571+
tm.assert_frame_equal(result, expected)
572+
568573
def test_dti_constructor_preserve_dti_freq(self):
569574
rng = date_range('1/1/2000', '1/2/2000', freq='5min')
570575

vb_suite/frame_methods.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,15 @@
5050

5151
frame_multiaxis_reindex = Benchmark('df.reindex(index=idx, columns=cols)',
5252
setup, start_date=datetime(2012, 5, 6))
53+
54+
#----------------------------------------------------------------------
55+
# boolean indexing
56+
57+
setup = common_setup + """
58+
df = DataFrame(randn(10000, 100))
59+
bool_arr = np.zeros(10000, dtype=bool)
60+
bool_arr[:1000] = True
61+
"""
62+
63+
frame_boolean_row_select = Benchmark('df[bool_arr]', setup,
64+
start_date=datetime(2011, 1, 1))

0 commit comments

Comments
 (0)