From 83fd0b47afb204aa1fd6b1c9054f45705282c880 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 20 Dec 2014 10:32:14 +0900 Subject: [PATCH] API: Index.duplicated should return np.array --- doc/source/whatsnew/v0.16.0.txt | 2 ++ pandas/core/base.py | 12 ++++++------ pandas/core/index.py | 3 ++- pandas/core/series.py | 3 ++- pandas/tests/test_base.py | 18 ++++++++++++------ pandas/tests/test_multilevel.py | 12 ++++++++---- 6 files changed, 32 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 8122b60b736c9..244f15fdb1fbb 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -27,6 +27,8 @@ Backwards incompatible API changes .. _whatsnew_0160.api_breaking: +- ``Index.duplicated`` now returns `np.array(dtype=bool)` rathar than `Index(dtype=object)` containing `bool` values. (:issue:`8875`) + Deprecations ~~~~~~~~~~~~ diff --git a/pandas/core/base.py b/pandas/core/base.py index f648af85b68c5..04b431ae8cf67 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -13,7 +13,8 @@ _shared_docs = dict() -_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='') +_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', + duplicated='IndexOpsMixin') class StringMixin(object): @@ -486,14 +487,14 @@ def searchsorted(self, key, side='left'): @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, take_last=False, inplace=False): duplicated = self.duplicated(take_last=take_last) - result = self[~(duplicated.values).astype(bool)] + result = self[np.logical_not(duplicated)] if inplace: return self._update_inplace(result) else: return result _shared_docs['duplicated'] = ( - """Return boolean %(klass)s denoting duplicate values + """Return boolean %(duplicated)s denoting duplicate values Parameters ---------- @@ -502,7 +503,7 @@ def drop_duplicates(self, take_last=False, inplace=False): Returns ------- - duplicated : %(klass)s + duplicated : %(duplicated)s """) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) @@ -513,8 +514,7 @@ def duplicated(self, take_last=False): return self._constructor(duplicated, index=self.index).__finalize__(self) except AttributeError: - from pandas.core.index import Index - return Index(duplicated) + return np.array(duplicated, dtype=bool) #---------------------------------------------------------------------- # abstracts diff --git a/pandas/core/index.py b/pandas/core/index.py index d0253efb180f6..d2a3093e686a7 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -33,7 +33,8 @@ _unsortable_types = frozenset(('mixed', 'mixed-integer')) -_index_doc_kwargs = dict(klass='Index', inplace='') +_index_doc_kwargs = dict(klass='Index', inplace='', + duplicated='np.array') def _try_get_item(x): diff --git a/pandas/core/series.py b/pandas/core/series.py index 081e5c50946bc..60b601a462520 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -59,7 +59,8 @@ klass='Series', axes_single_arg="{0,'index'}", inplace="""inplace : boolean, default False - If True, performs operation inplace and returns None.""" + If True, performs operation inplace and returns None.""", + duplicated='Series' ) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index be5e102691fa0..61bfeb6631d68 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -614,8 +614,10 @@ def test_duplicated_drop_duplicates(self): continue # original doesn't have duplicates - expected = Index([False] * len(original)) - tm.assert_index_equal(original.duplicated(), expected) + expected = np.array([False] * len(original), dtype=bool) + duplicated = original.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + self.assertTrue(duplicated.dtype == bool) result = original.drop_duplicates() tm.assert_index_equal(result, original) self.assertFalse(result is original) @@ -625,15 +627,19 @@ def test_duplicated_drop_duplicates(self): # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] - expected = Index([False] * len(original) + [True, True]) - tm.assert_index_equal(idx.duplicated(), expected) + expected = np.array([False] * len(original) + [True, True], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + self.assertTrue(duplicated.dtype == bool) tm.assert_index_equal(idx.drop_duplicates(), original) last_base = [False] * len(idx) last_base[3] = True last_base[5] = True - expected = Index(last_base) - tm.assert_index_equal(idx.duplicated(take_last=True), expected) + expected = np.array(last_base) + duplicated = idx.duplicated(take_last=True) + tm.assert_numpy_array_equal(duplicated, expected) + self.assertTrue(duplicated.dtype == bool) tm.assert_index_equal(idx.drop_duplicates(take_last=True), idx[~np.array(last_base)]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a10467cf7ab4a..f70d652b5b1eb 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2075,13 +2075,17 @@ def test_duplicated_drop_duplicates(self): # GH 4060 idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2 ,3], [1, 1, 1, 1, 2, 2])) - expected = Index([False, False, False, True, False, False]) - tm.assert_index_equal(idx.duplicated(), expected) + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + self.assertTrue(duplicated.dtype == bool) expected = MultiIndex.from_arrays(([1, 2, 3, 2 ,3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(), expected) - expected = Index([True, False, False, False, False, False]) - tm.assert_index_equal(idx.duplicated(take_last=True), expected) + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(take_last=True) + tm.assert_numpy_array_equal(duplicated, expected) + self.assertTrue(duplicated.dtype == bool) expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(take_last=True), expected)