Skip to content

BUG: Index.take may add unnecessary freq attribute #10791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 12, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ Performance Improvements
- 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`)
- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
- Significantly improved performance of indexing ``MultiIndex`` with slicers (:issue:`10287`)
- 8x improvement in ``iloc`` using list-like input (:issue:`10791`)
- Improved performance of ``Series.isin`` for datetimelike/integer Series (:issue:`10287`)
- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`)
- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
Expand Down Expand Up @@ -624,7 +625,7 @@ Bug Fixes
- Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)

- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`)

- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`)
Expand Down
10 changes: 4 additions & 6 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,7 @@ def _ensure_compat_concat(indexes):

return indexes

def take(self, indexer, axis=0):
def take(self, indices, axis=0):
"""
return a new Index of the values selected by the indexer

Expand All @@ -1203,11 +1203,9 @@ def take(self, indexer, axis=0):
numpy.ndarray.take
"""

indexer = com._ensure_platform_int(indexer)
taken = np.array(self).take(indexer)

# by definition cannot propogate freq
return self._shallow_copy(taken, freq=None)
indices = com._ensure_platform_int(indices)
taken = self.values.take(indices)
return self._shallow_copy(taken)

def putmask(self, mask, value):
"""
Expand Down
15 changes: 10 additions & 5 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ def test_take(self):
expected = ind[indexer]
self.assertTrue(result.equals(expected))

if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
with tm.assertRaises(AttributeError):
ind.freq

def test_setops_errorcases(self):
for name, idx in compat.iteritems(self.indices):
# # non-iterable input
Expand Down Expand Up @@ -4775,7 +4780,7 @@ def test_repr_roundtrip(self):

mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
str(mi)

if compat.PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
Expand All @@ -4784,11 +4789,11 @@ def test_repr_roundtrip(self):
tm.assert_index_equal(result, mi, exact=False)
self.assertEqual(mi.get_level_values('first').inferred_type, 'string')
self.assertEqual(result.get_level_values('first').inferred_type, 'unicode')

mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second'])
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
tm.assert_index_equal(result, mi_u, exact=True)

# formatting
if compat.PY3:
str(mi)
Expand All @@ -4810,7 +4815,7 @@ def test_repr_roundtrip(self):

mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second'])
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
tm.assert_index_equal(result, mi_u, exact=True)

def test_str(self):
# tested elsewhere
Expand Down
6 changes: 4 additions & 2 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,12 @@ def take(self, indices, axis=0):
"""
Analogous to ndarray.take
"""
maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices), len(self))
indices = com._ensure_int64(indices)
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
if isinstance(maybe_slice, slice):
return self[maybe_slice]
return super(DatetimeIndexOpsMixin, self).take(indices, axis)
taken = self.asi8.take(indices)
return self._shallow_copy(taken, freq=None)

def get_duplicates(self):
values = Index.get_duplicates(self)
Expand Down
13 changes: 13 additions & 0 deletions vb_suite/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,16 @@

multiindex_slicers = Benchmark('mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]', setup,
start_date=datetime(2015, 1, 1))

#----------------------------------------------------------------------
# take

setup = common_setup + """
s = Series(np.random.rand(100000))
ts = Series(np.random.rand(100000),
index=date_range('2011-01-01', freq='S', periods=100000))
indexer = [True, False, True, True, False] * 20000
"""

series_take_intindex = Benchmark("s.take(indexer)", setup)
series_take_dtindex = Benchmark("ts.take(indexer)", setup)