From 3d916ee5d1bd2eb9b2a6e9348256ae0b9db023b3 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 12:25:50 +0100 Subject: [PATCH 01/16] Create separate docstring for index duplicated --- pandas/core/indexes/base.py | 49 ++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e6ae88a26e7c..eb7f1d553acb9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4021,8 +4021,55 @@ def unique(self, level=None): def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): + """ + Indicate duplicate index values + + Duplicated values are indicated as ``True`` values in the resulting + array. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + >>> idx.duplicated() + array([False, False, True, False, True, False]) + + which is equivalent to + + >>> idx.duplicated(keep='first') + array([False, False, True, False, True, False]) + + By using 'last', the last occurrence of each set of duplicated values is + set on False and all others on True: + + >>> idx.duplicated(keep='last') + array([ True, False, True, False, False, False]) + + By setting keep on ``False``, all duplicates are True: + + >>> idx.duplicated(keep=False) + array([ True, False, True, False, True, False]) + + Returns + ------- + numpy.ndarray + + See Also + -------- + pandas.Series.duplicated : equivalent method on pandas.Series + """ return super(Index, self).duplicated(keep=keep) _index_shared_docs['fillna'] = """ From 9f4cfc7d7a6d7d008e22f1ee7e677640b9a56256 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 12:30:51 +0100 Subject: [PATCH 02/16] --amend --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index eb7f1d553acb9..583cad69ec604 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4026,7 +4026,8 @@ def duplicated(self, keep='first'): Indicate duplicate index values Duplicated values are indicated as ``True`` values in the resulting - array. + array. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. Parameters ---------- From faa87dac3c4a1ddc2916bca720ec9b60cbff2615 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 12:32:42 +0100 Subject: [PATCH 03/16] Remove duplicat element from shared dict _index_doc_kwargs --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 583cad69ec604..cbf14c9f3a538 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -73,7 +73,7 @@ _index_doc_kwargs = dict(klass='Index', inplace='', target_klass='Index', - unique='Index', duplicated='np.ndarray') + unique='Index') _index_shared_docs = dict() From a35c25bd5ecb94d031b873dfa8637daf0d86bb27 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 12:49:33 +0100 Subject: [PATCH 04/16] Add docstring of Series duplicated --- pandas/core/base.py | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e3..368082f7fb451 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1234,6 +1234,79 @@ def drop_duplicates(self, keep='first', inplace=False): @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): + """ + Indicate duplicate Series values + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', + ... 'beetle', 'lama', 'hippo']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values is + set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + 5 False + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + Returns + ------- + pandas.core.series.Series + + See Also + -------- + pandas.Index.duplicated : equivalent method on pandas.Index + """ from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: From 688a9d3e36d54e701fe34627ad74dc7ef5ca277e Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 12:53:23 +0100 Subject: [PATCH 05/16] Clean old docstring referencegs --- pandas/core/base.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 368082f7fb451..92598065386f9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -30,7 +30,7 @@ _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', - unique='IndexOpsMixin', duplicated='IndexOpsMixin') + unique='IndexOpsMixin') class StringMixin(object): @@ -1215,24 +1215,6 @@ def drop_duplicates(self, keep='first', inplace=False): else: return result - _shared_docs['duplicated'] = ( - """Return boolean %(duplicated)s denoting duplicate values - - Parameters - ---------- - keep : {'first', 'last', False}, default 'first' - - ``first`` : Mark duplicates as ``True`` except for the first - occurrence. - - ``last`` : Mark duplicates as ``True`` except for the last - occurrence. - - False : Mark all duplicates as ``True``. - - Returns - ------- - duplicated : %(duplicated)s - """) - - @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): """ Indicate duplicate Series values From 1daaed330d9a145d08a21a47a9b09bc88764f264 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:00:46 +0100 Subject: [PATCH 06/16] Reset dict entries for existing docstrings --- pandas/core/base.py | 2 +- pandas/core/indexes/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 92598065386f9..8ec343c6605ba 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -30,7 +30,7 @@ _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', - unique='IndexOpsMixin') + unique='IndexOpsMixin', duplicated='IndexOpsMixin') class StringMixin(object): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cbf14c9f3a538..583cad69ec604 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -73,7 +73,7 @@ _index_doc_kwargs = dict(klass='Index', inplace='', target_klass='Index', - unique='Index') + unique='Index', duplicated='np.ndarray') _index_shared_docs = dict() From 28c9bf6ec2a48098b78f901b36950ae84ac9e6b8 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:16:08 +0100 Subject: [PATCH 07/16] Update docstring reference to Index version --- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/multi.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 218851b1713f2..acf058e1b031d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -399,7 +399,7 @@ def unique(self, level=None): return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) + @Appender(Index.duplicated.__doc__) def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 codes = self.codes.astype('i8') diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 73f4aee1c4880..22cf5ec11adb6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -916,7 +916,7 @@ def f(k, stringify): for k, stringify in zip(key, self._have_mixed_levels)]) return hash_tuple(key) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) + @Appender(Index.duplicated.__doc__) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index from pandas._libs.hashtable import duplicated_int64 From 4aad42ab3ed917236f1897b35f3dc85ad6b5cc3a Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:16:27 +0100 Subject: [PATCH 08/16] Move docstring to series implementation level --- pandas/core/base.py | 73 ------------------------------------------ pandas/core/series.py | 74 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 74 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 8ec343c6605ba..909236cc7abd7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1216,79 +1216,6 @@ def drop_duplicates(self, keep='first', inplace=False): return result def duplicated(self, keep='first'): - """ - Indicate duplicate Series values - - Duplicated values are indicated as ``True`` values in the resulting - Series. Either all duplicates, all except the first or all except the - last occurrence of duplicates can be indicated. - - Parameters - ---------- - keep : {'first', 'last', False}, default 'first' - - 'first' : Mark duplicates as ``True`` except for the first - occurrence. - - 'last' : Mark duplicates as ``True`` except for the last - occurrence. - - ``False`` : Mark all duplicates as ``True``. - - Examples - -------- - By default, for each set of duplicated values, the first occurrence is - set on False and all others on True: - - >>> animals = pd.Series(['lama', 'cow', 'lama', - ... 'beetle', 'lama', 'hippo']) - >>> animals.duplicated() - 0 False - 1 False - 2 True - 3 False - 4 True - 5 False - dtype: bool - - which is equivalent to - - >>> animals.duplicated(keep='first') - 0 False - 1 False - 2 True - 3 False - 4 True - 5 False - dtype: bool - - By using 'last', the last occurrence of each set of duplicated values is - set on False and all others on True: - - >>> animals.duplicated(keep='last') - 0 True - 1 False - 2 True - 3 False - 4 False - 5 False - - By setting keep on ``False``, all duplicates are True: - - >>> animals.duplicated(keep=False) - 0 True - 1 False - 2 True - 3 False - 4 True - 5 False - dtype: bool - - Returns - ------- - pandas.core.series.Series - - See Also - -------- - pandas.Index.duplicated : equivalent method on pandas.Index - """ from pandas.core.algorithms import duplicated if isinstance(self, ABCIndexClass): if self.is_unique: diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1..9a6eb557149e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1320,8 +1320,80 @@ def unique(self): def drop_duplicates(self, keep='first', inplace=False): return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) - @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) def duplicated(self, keep='first'): + """ + Indicate duplicate Series values + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', + ... 'beetle', 'lama', 'hippo']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values is + set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + 5 False + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + dtype: bool + + Returns + ------- + pandas.core.series.Series + + See Also + -------- + pandas.Index.duplicated : equivalent method on pandas.Index + """ return super(Series, self).duplicated(keep=keep) def idxmin(self, axis=None, skipna=True, *args, **kwargs): From b4cd28bead822885b327a5ffa9fb0dd9ac18aba3 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:18:51 +0100 Subject: [PATCH 09/16] Fix docstring guide errors --- pandas/core/indexes/base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 583cad69ec604..0fcb6b0cfa580 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4023,7 +4023,7 @@ def drop_duplicates(self, keep='first'): def duplicated(self, keep='first'): """ - Indicate duplicate index values + Indicate duplicate index values. Duplicated values are indicated as ``True`` values in the resulting array. Either all duplicates, all except the first or all except the @@ -4045,23 +4045,23 @@ def duplicated(self, keep='first'): >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) >>> idx.duplicated() - array([False, False, True, False, True, False]) + array([False, False, True, False, True, False], dtype=bool) which is equivalent to >>> idx.duplicated(keep='first') - array([False, False, True, False, True, False]) + array([False, False, True, False, True, False], dtype=bool) By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> idx.duplicated(keep='last') - array([ True, False, True, False, False, False]) + array([ True, False, True, False, False, False], dtype=bool) By setting keep on ``False``, all duplicates are True: >>> idx.duplicated(keep=False) - array([ True, False, True, False, True, False]) + array([ True, False, True, False, True, False], dtype=bool) Returns ------- From 7f6ca4eb647840c6429f22c37b0f699522c259d5 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:20:48 +0100 Subject: [PATCH 10/16] --amend --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9a6eb557149e3..641caa779ec6e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1322,7 +1322,7 @@ def drop_duplicates(self, keep='first', inplace=False): def duplicated(self, keep='first'): """ - Indicate duplicate Series values + Indicate duplicate Series values. Duplicated values are indicated as ``True`` values in the resulting Series. Either all duplicates, all except the first or all except the @@ -1374,6 +1374,7 @@ def duplicated(self, keep='first'): 3 False 4 False 5 False + dtype: bool By setting keep on ``False``, all duplicates are True: From b48e13f5a1798d5737663d8139955b04c93cd01c Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:25:40 +0100 Subject: [PATCH 11/16] Remove trailing whitespaces --- pandas/core/indexes/base.py | 20 ++++++++++---------- pandas/core/series.py | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0fcb6b0cfa580..bafbe178b53c2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1156,7 +1156,7 @@ def to_frame(self, index=True): >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') >>> idx.to_frame() animal - animal + animal Ant Ant Bear Bear Cow Cow @@ -4026,7 +4026,7 @@ def duplicated(self, keep='first'): Indicate duplicate index values. Duplicated values are indicated as ``True`` values in the resulting - array. Either all duplicates, all except the first or all except the + array. Either all duplicates, all except the first or all except the last occurrence of duplicates can be indicated. Parameters @@ -4034,39 +4034,39 @@ def duplicated(self, keep='first'): keep : {'first', 'last', False}, default 'first' - 'first' : Mark duplicates as ``True`` except for the first occurrence. - - 'last' : Mark duplicates as ``True`` except for the last + - 'last' : Mark duplicates as ``True`` except for the last occurrence. - ``False`` : Mark all duplicates as ``True``. - + Examples -------- - By default, for each set of duplicated values, the first occurrence is + By default, for each set of duplicated values, the first occurrence is set on False and all others on True: >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) >>> idx.duplicated() array([False, False, True, False, True, False], dtype=bool) - which is equivalent to + which is equivalent to >>> idx.duplicated(keep='first') array([False, False, True, False, True, False], dtype=bool) - By using 'last', the last occurrence of each set of duplicated values is + By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> idx.duplicated(keep='last') array([ True, False, True, False, False, False], dtype=bool) By setting keep on ``False``, all duplicates are True: - + >>> idx.duplicated(keep=False) array([ True, False, True, False, True, False], dtype=bool) - + Returns ------- numpy.ndarray - + See Also -------- pandas.Series.duplicated : equivalent method on pandas.Series diff --git a/pandas/core/series.py b/pandas/core/series.py index 641caa779ec6e..213e2af565f6a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1325,7 +1325,7 @@ def duplicated(self, keep='first'): Indicate duplicate Series values. Duplicated values are indicated as ``True`` values in the resulting - Series. Either all duplicates, all except the first or all except the + Series. Either all duplicates, all except the first or all except the last occurrence of duplicates can be indicated. Parameters @@ -1333,16 +1333,16 @@ def duplicated(self, keep='first'): keep : {'first', 'last', False}, default 'first' - 'first' : Mark duplicates as ``True`` except for the first occurrence. - - 'last' : Mark duplicates as ``True`` except for the last + - 'last' : Mark duplicates as ``True`` except for the last occurrence. - ``False`` : Mark all duplicates as ``True``. - + Examples -------- - By default, for each set of duplicated values, the first occurrence is + By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> animals = pd.Series(['lama', 'cow', 'lama', + >>> animals = pd.Series(['lama', 'cow', 'lama', ... 'beetle', 'lama', 'hippo']) >>> animals.duplicated() 0 False @@ -1353,7 +1353,7 @@ def duplicated(self, keep='first'): 5 False dtype: bool - which is equivalent to + which is equivalent to >>> animals.duplicated(keep='first') 0 False @@ -1364,7 +1364,7 @@ def duplicated(self, keep='first'): 5 False dtype: bool - By using 'last', the last occurrence of each set of duplicated values is + By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> animals.duplicated(keep='last') @@ -1377,7 +1377,7 @@ def duplicated(self, keep='first'): dtype: bool By setting keep on ``False``, all duplicates are True: - + >>> animals.duplicated(keep=False) 0 True 1 False @@ -1386,11 +1386,11 @@ def duplicated(self, keep='first'): 4 True 5 False dtype: bool - + Returns ------- pandas.core.series.Series - + See Also -------- pandas.Index.duplicated : equivalent method on pandas.Index From df6e7c8a612a4758549218b90895b17cd24fb582 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:26:47 +0100 Subject: [PATCH 12/16] Fixe too long lines --- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bafbe178b53c2..c640ca4e760bd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4052,8 +4052,8 @@ def duplicated(self, keep='first'): >>> idx.duplicated(keep='first') array([False, False, True, False, True, False], dtype=bool) - By using 'last', the last occurrence of each set of duplicated values is - set on False and all others on True: + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: >>> idx.duplicated(keep='last') array([ True, False, True, False, False, False], dtype=bool) diff --git a/pandas/core/series.py b/pandas/core/series.py index 213e2af565f6a..3c2d3bc9ecb4d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1364,8 +1364,8 @@ def duplicated(self, keep='first'): 5 False dtype: bool - By using 'last', the last occurrence of each set of duplicated values is - set on False and all others on True: + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: >>> animals.duplicated(keep='last') 0 True From c2f79de9fc6c42a7affbd98838b22cc120169876 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 13:32:35 +0100 Subject: [PATCH 13/16] Remove redundant last entry of examples --- pandas/core/indexes/base.py | 10 +++++----- pandas/core/series.py | 7 +------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c640ca4e760bd..3b13da2be22b8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4043,25 +4043,25 @@ def duplicated(self, keep='first'): By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) >>> idx.duplicated() - array([False, False, True, False, True, False], dtype=bool) + array([False, False, True, False, True], dtype=bool) which is equivalent to >>> idx.duplicated(keep='first') - array([False, False, True, False, True, False], dtype=bool) + array([False, False, True, False, True], dtype=bool) By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> idx.duplicated(keep='last') - array([ True, False, True, False, False, False], dtype=bool) + array([ True, False, True, False, False], dtype=bool) By setting keep on ``False``, all duplicates are True: >>> idx.duplicated(keep=False) - array([ True, False, True, False, True, False], dtype=bool) + array([ True, False, True, False, True], dtype=bool) Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 3c2d3bc9ecb4d..511916e705da3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1342,15 +1342,13 @@ def duplicated(self, keep='first'): By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> animals = pd.Series(['lama', 'cow', 'lama', - ... 'beetle', 'lama', 'hippo']) + >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) >>> animals.duplicated() 0 False 1 False 2 True 3 False 4 True - 5 False dtype: bool which is equivalent to @@ -1361,7 +1359,6 @@ def duplicated(self, keep='first'): 2 True 3 False 4 True - 5 False dtype: bool By using 'last', the last occurrence of each set of duplicated values @@ -1373,7 +1370,6 @@ def duplicated(self, keep='first'): 2 True 3 False 4 False - 5 False dtype: bool By setting keep on ``False``, all duplicates are True: @@ -1384,7 +1380,6 @@ def duplicated(self, keep='first'): 2 True 3 False 4 True - 5 False dtype: bool Returns From f4e3756febc8caff0fced4f3ebec62703d0b5177 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 16:31:24 +0100 Subject: [PATCH 14/16] Extend related methods --- pandas/core/series.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 511916e705da3..e03454ba97907 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1388,7 +1388,9 @@ def duplicated(self, keep='first'): See Also -------- - pandas.Index.duplicated : equivalent method on pandas.Index + pandas.Index.duplicated : Equivalent method on pandas.Index + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame + pandas.Series.drop_duplicates : Remove duplicate values from Series """ return super(Series, self).duplicated(keep=keep) From 1ff786412c3abaec8159e2c8e90820722ef00200 Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Sat, 10 Mar 2018 16:33:36 +0100 Subject: [PATCH 15/16] Extend related methods of index --- pandas/core/indexes/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3b13da2be22b8..771de268cd272 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4069,7 +4069,9 @@ def duplicated(self, keep='first'): See Also -------- - pandas.Series.duplicated : equivalent method on pandas.Series + pandas.Series.duplicated : Equivalent method on pandas.Series + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame + pandas.Index.drop_duplicates : Remove duplicate values from Index """ return super(Index, self).duplicated(keep=keep) From e02eda665e82e456c1181e218997e3ec04ef141e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Mar 2018 10:00:42 -0500 Subject: [PATCH 16/16] Cleanup [ci skip] [ci skip] --- pandas/core/indexes/base.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 56d65ee463eae..de4ea5fcfaefa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4330,12 +4330,14 @@ def duplicated(self, keep='first'): Indicate duplicate index values. Duplicated values are indicated as ``True`` values in the resulting - array. Either all duplicates, all except the first or all except the + array. Either all duplicates, all except the first, or all except the last occurrence of duplicates can be indicated. Parameters ---------- keep : {'first', 'last', False}, default 'first' + The value or values in a set of duplicates to mark as missing. + - 'first' : Mark duplicates as ``True`` except for the first occurrence. - 'last' : Mark duplicates as ``True`` except for the last @@ -4345,27 +4347,27 @@ def duplicated(self, keep='first'): Examples -------- By default, for each set of duplicated values, the first occurrence is - set on False and all others on True: + set to False and all others to True: >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) >>> idx.duplicated() - array([False, False, True, False, True], dtype=bool) + array([False, False, True, False, True]) which is equivalent to >>> idx.duplicated(keep='first') - array([False, False, True, False, True], dtype=bool) + array([False, False, True, False, True]) By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> idx.duplicated(keep='last') - array([ True, False, True, False, False], dtype=bool) + array([ True, False, True, False, False]) By setting keep on ``False``, all duplicates are True: >>> idx.duplicated(keep=False) - array([ True, False, True, False, True], dtype=bool) + array([ True, False, True, False, True]) Returns -------