@@ -4740,6 +4740,73 @@ def duplicated(
4740
4740
Returns
4741
4741
-------
4742
4742
Series
4743
+ Boolean series for each duplicated rows.
4744
+
4745
+ See Also
4746
+ --------
4747
+ Index.duplicated : Equivalent method on index.
4748
+ Series.duplicated : Equivalent method on Series.
4749
+ Series.drop_duplicates : Remove duplicate values from Series.
4750
+ DataFrame.drop_duplicates : Remove duplicate values from DataFrame.
4751
+
4752
+ Examples
4753
+ --------
4754
+ Consider dataset containing ramen rating.
4755
+
4756
+ >>> df = pd.DataFrame({
4757
+ ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
4758
+ ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
4759
+ ... 'rating': [4, 4, 3.5, 15, 5]
4760
+ ... })
4761
+ >>> df
4762
+ brand style rating
4763
+ 0 Yum Yum cup 4.0
4764
+ 1 Yum Yum cup 4.0
4765
+ 2 Indomie cup 3.5
4766
+ 3 Indomie pack 15.0
4767
+ 4 Indomie pack 5.0
4768
+
4769
+ By default, for each set of duplicated values, the first occurrence
4770
+ is set on False and all others on True.
4771
+
4772
+ >>> df.duplicated()
4773
+ 0 False
4774
+ 1 True
4775
+ 2 False
4776
+ 3 False
4777
+ 4 False
4778
+ dtype: bool
4779
+
4780
+ By using 'last', the last occurrence of each set of duplicated values
4781
+ is set on False and all others on True.
4782
+
4783
+ >>> df.duplicated(keep='last')
4784
+ 0 True
4785
+ 1 False
4786
+ 2 False
4787
+ 3 False
4788
+ 4 False
4789
+ dtype: bool
4790
+
4791
+ By setting ``keep`` on False, all duplicates are True.
4792
+
4793
+ >>> df.duplicated(keep=False)
4794
+ 0 True
4795
+ 1 True
4796
+ 2 False
4797
+ 3 False
4798
+ 4 False
4799
+ dtype: bool
4800
+
4801
+ To find duplicates on specific column(s), use ``subset``.
4802
+
4803
+ >>> df.duplicated(subset=['brand'])
4804
+ 0 False
4805
+ 1 True
4806
+ 2 False
4807
+ 3 True
4808
+ 4 True
4809
+ dtype: bool
4743
4810
"""
4744
4811
from pandas .core .sorting import get_group_index
4745
4812
from pandas ._libs .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
0 commit comments