From 194d7cdf4c85e7edcb503d5c6a89556e43b19c39 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Tue, 24 Oct 2017 22:32:12 -0400 Subject: [PATCH 1/6] Add drop_duplicates test for uint, float and bool --- pandas/tests/series/test_analytics.py | 193 ++++++++++++++++++++------ 1 file changed, 147 insertions(+), 46 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 8cc40bb5146c5..edacca582dd99 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -873,52 +873,153 @@ def test_unique(self): expected = np.array([1, 2, 3, None], dtype=object) tm.assert_numpy_array_equal(result, expected) - def test_drop_duplicates(self): - # check both int and object - for s in [Series([1, 2, 3, 3]), Series(['1', '2', '3', '3'])]: - expected = Series([False, False, False, True]) - assert_series_equal(s.duplicated(), expected) - assert_series_equal(s.drop_duplicates(), s[~expected]) - sc = s.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, False, True, False]) - assert_series_equal(s.duplicated(keep='last'), expected) - assert_series_equal(s.drop_duplicates(keep='last'), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, False, True, True]) - assert_series_equal(s.duplicated(keep=False), expected) - assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, s[~expected]) - - for s in [Series([1, 2, 3, 5, 3, 2, 4]), - Series(['1', '2', '3', '5', '3', '2', '4'])]: - expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(s.duplicated(), expected) - assert_series_equal(s.drop_duplicates(), s[~expected]) - sc = s.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(s.duplicated(keep='last'), expected) - assert_series_equal(s.drop_duplicates(keep='last'), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(s.duplicated(keep=False), expected) - assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, s[~expected]) + @pytest.mark.parametrize('dtype', ['int_', 'uint', 'float_']) + def test_drop_duplicates_with_numeric(self, dtype): + # Test case 1 + test_case_1 = Series([1, 2, 3, 3], dtype=np.dtype(dtype)) + + expected = Series([False, False, False, True]) + assert_series_equal(test_case_1.duplicated(), expected) + assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, False, True, False]) + assert_series_equal(test_case_1.duplicated(keep='last'), expected) + assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, False, True, True]) + assert_series_equal(test_case_1.duplicated(keep=False), expected) + assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + # Test case 2 + test_case_2 = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) + + expected = Series([False, False, False, False, True, True, False]) + assert_series_equal(test_case_2.duplicated(), expected) + assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + assert_series_equal(test_case_2.duplicated(keep='last'), expected) + assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + assert_series_equal(test_case_2.duplicated(keep=False), expected) + assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + + def test_drop_duplicates_with_object(self): + # Test case 1 + test_case_1 = Series(['1', '2', '3', '3']) + + expected = Series([False, False, False, True]) + assert_series_equal(test_case_1.duplicated(), expected) + assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, False, True, False]) + assert_series_equal(test_case_1.duplicated(keep='last'), expected) + assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, False, True, True]) + assert_series_equal(test_case_1.duplicated(keep=False), expected) + assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + # Test case 2 + test_case_2 = Series(['1', '2', '3', '5', '3', '2', '4']) + + expected = Series([False, False, False, False, True, True, False]) + assert_series_equal(test_case_2.duplicated(), expected) + assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + assert_series_equal(test_case_2.duplicated(keep='last'), expected) + assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + assert_series_equal(test_case_2.duplicated(keep=False), expected) + assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + def test_drop_duplicates_with_bool(self): + # Test case 1 + test_case_1 = Series([True, False, False]) + + expected = Series([False, False, True]) + assert_series_equal(test_case_1.duplicated(), expected) + assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, True, False]) + assert_series_equal(test_case_1.duplicated(keep='last'), expected) + assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + expected = Series([False, True, True]) + assert_series_equal(test_case_1.duplicated(keep=False), expected) + assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) + sc = test_case_1.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_1[~expected]) + + # Test case 2 + test_case_2 = Series([True, False, True, False]) + expected = Series([False, False, True, True]) + assert_series_equal(test_case_2.duplicated(), expected) + assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([True, True, False, False]) + assert_series_equal(test_case_2.duplicated(keep='last'), expected) + assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, test_case_2[~expected]) + + expected = Series([True, True, True, True]) + assert_series_equal(test_case_2.duplicated(keep=False), expected) + assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) + sc = test_case_2.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, test_case_2[~expected]) def test_clip(self): val = self.ts.median() From 888e9375e500e1a37f42071f6468b81b8335e530 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Tue, 24 Oct 2017 22:47:53 -0400 Subject: [PATCH 2/6] TST: #15752 resolve PEP8 issues in test_analytics.py --- pandas/tests/series/test_analytics.py | 156 +++++++++++++------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index edacca582dd99..81bc58797daa4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -876,150 +876,150 @@ def test_unique(self): @pytest.mark.parametrize('dtype', ['int_', 'uint', 'float_']) def test_drop_duplicates_with_numeric(self, dtype): # Test case 1 - test_case_1 = Series([1, 2, 3, 3], dtype=np.dtype(dtype)) + tc_1 = Series([1, 2, 3, 3], dtype=np.dtype(dtype)) expected = Series([False, False, False, True]) - assert_series_equal(test_case_1.duplicated(), expected) - assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(), expected) + assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, False, True, False]) - assert_series_equal(test_case_1.duplicated(keep='last'), expected) - assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep='last'), expected) + assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, False, True, True]) - assert_series_equal(test_case_1.duplicated(keep=False), expected) - assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep=False), expected) + assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) # Test case 2 - test_case_2 = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) + tc_2 = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(test_case_2.duplicated(), expected) - assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(), expected) + assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(test_case_2.duplicated(keep='last'), expected) - assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep='last'), expected) + assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(test_case_2.duplicated(keep=False), expected) - assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep=False), expected) + assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) def test_drop_duplicates_with_object(self): # Test case 1 - test_case_1 = Series(['1', '2', '3', '3']) + tc_1 = Series(['1', '2', '3', '3']) expected = Series([False, False, False, True]) - assert_series_equal(test_case_1.duplicated(), expected) - assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(), expected) + assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, False, True, False]) - assert_series_equal(test_case_1.duplicated(keep='last'), expected) - assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep='last'), expected) + assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, False, True, True]) - assert_series_equal(test_case_1.duplicated(keep=False), expected) - assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep=False), expected) + assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) # Test case 2 - test_case_2 = Series(['1', '2', '3', '5', '3', '2', '4']) + tc_2 = Series(['1', '2', '3', '5', '3', '2', '4']) expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(test_case_2.duplicated(), expected) - assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(), expected) + assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(test_case_2.duplicated(keep='last'), expected) - assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep='last'), expected) + assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(test_case_2.duplicated(keep=False), expected) - assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep=False), expected) + assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) def test_drop_duplicates_with_bool(self): # Test case 1 - test_case_1 = Series([True, False, False]) + tc_1 = Series([True, False, False]) expected = Series([False, False, True]) - assert_series_equal(test_case_1.duplicated(), expected) - assert_series_equal(test_case_1.drop_duplicates(), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(), expected) + assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, True, False]) - assert_series_equal(test_case_1.duplicated(keep='last'), expected) - assert_series_equal(test_case_1.drop_duplicates(keep='last'), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep='last'), expected) + assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) expected = Series([False, True, True]) - assert_series_equal(test_case_1.duplicated(keep=False), expected) - assert_series_equal(test_case_1.drop_duplicates(keep=False), test_case_1[~expected]) - sc = test_case_1.copy() + assert_series_equal(tc_1.duplicated(keep=False), expected) + assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) + sc = tc_1.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_1[~expected]) + assert_series_equal(sc, tc_1[~expected]) # Test case 2 - test_case_2 = Series([True, False, True, False]) + tc_2 = Series([True, False, True, False]) expected = Series([False, False, True, True]) - assert_series_equal(test_case_2.duplicated(), expected) - assert_series_equal(test_case_2.drop_duplicates(), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(), expected) + assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([True, True, False, False]) - assert_series_equal(test_case_2.duplicated(keep='last'), expected) - assert_series_equal(test_case_2.drop_duplicates(keep='last'), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep='last'), expected) + assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) expected = Series([True, True, True, True]) - assert_series_equal(test_case_2.duplicated(keep=False), expected) - assert_series_equal(test_case_2.drop_duplicates(keep=False), test_case_2[~expected]) - sc = test_case_2.copy() + assert_series_equal(tc_2.duplicated(keep=False), expected) + assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) + sc = tc_2.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, test_case_2[~expected]) + assert_series_equal(sc, tc_2[~expected]) def test_clip(self): val = self.ts.median() From 523ac991a7ba22e5f1988c9d79ba089f28089b4e Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Tue, 24 Oct 2017 22:49:40 -0400 Subject: [PATCH 3/6] TST: #15752 resolve PEP8 issues in test_analytics.py --- pandas/tests/series/test_analytics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 81bc58797daa4..4060f0c13988b 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -923,7 +923,6 @@ def test_drop_duplicates_with_numeric(self, dtype): sc.drop_duplicates(keep=False, inplace=True) assert_series_equal(sc, tc_2[~expected]) - def test_drop_duplicates_with_object(self): # Test case 1 tc_1 = Series(['1', '2', '3', '3']) From 897b09e93a8c458f706c62af20c8c45ae9edf211 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Wed, 25 Oct 2017 22:57:03 -0400 Subject: [PATCH 4/6] TST: #15752 parametrized test_drop_duplicates and removed duplicate code --- pandas/tests/series/test_analytics.py | 186 +++++++------------------- 1 file changed, 45 insertions(+), 141 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 4060f0c13988b..0ff0b5a6fdd62 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -873,152 +873,56 @@ def test_unique(self): expected = np.array([1, 2, 3, None], dtype=object) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize('dtype', ['int_', 'uint', 'float_']) - def test_drop_duplicates_with_numeric(self, dtype): - # Test case 1 - tc_1 = Series([1, 2, 3, 3], dtype=np.dtype(dtype)) - - expected = Series([False, False, False, True]) - assert_series_equal(tc_1.duplicated(), expected) - assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) - sc = tc_1.copy() + @pytest.mark.parametrize("tc, expected", [ + (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')), + [ + Series([False, False, False, False, True, True, False]), + Series([False, True, True, False, False, False, False]), + Series([False, True, True, False, True, True, False]) + ]), + (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')), + [ + Series([False, False, False, False, True, True, False]), + Series([False, True, True, False, False, False, False]), + Series([False, True, True, False, True, True, False]) + ]), + (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')), + [ + Series([False, False, False, False, True, True, False]), + Series([False, True, True, False, False, False, False]), + Series([False, True, True, False, True, True, False]) + ]), + (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')), + [ + Series([False, False, False, False, True, True, False]), + Series([False, True, True, False, False, False, False]), + Series([False, True, True, False, True, True, False]) + ]), + (Series([True, False, True, False]), + [ + Series([False, False, True, True]), + Series([True, True, False, False]), + Series([True, True, True, True]) + ]) + ]) + def test_drop_duplicates(self, tc, expected): + assert_series_equal(tc.duplicated(), expected[0]) + assert_series_equal(tc.drop_duplicates(), tc[~expected[0]]) + sc = tc.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_1[~expected]) + assert_series_equal(sc, tc[~expected[0]]) - expected = Series([False, False, True, False]) - assert_series_equal(tc_1.duplicated(keep='last'), expected) - assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) - sc = tc_1.copy() + assert_series_equal(tc.duplicated(keep='last'), expected[1]) + assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected[1]]) + sc = tc.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_1[~expected]) + assert_series_equal(sc, tc[~expected[1]]) - expected = Series([False, False, True, True]) - assert_series_equal(tc_1.duplicated(keep=False), expected) - assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) - sc = tc_1.copy() + assert_series_equal(tc.duplicated(keep=False), expected[2]) + assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected[2]]) + sc = tc.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - # Test case 2 - tc_2 = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) - - expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(tc_2.duplicated(), expected) - assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(tc_2.duplicated(keep='last'), expected) - assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(tc_2.duplicated(keep=False), expected) - assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - def test_drop_duplicates_with_object(self): - # Test case 1 - tc_1 = Series(['1', '2', '3', '3']) - - expected = Series([False, False, False, True]) - assert_series_equal(tc_1.duplicated(), expected) - assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - expected = Series([False, False, True, False]) - assert_series_equal(tc_1.duplicated(keep='last'), expected) - assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - expected = Series([False, False, True, True]) - assert_series_equal(tc_1.duplicated(keep=False), expected) - assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - # Test case 2 - tc_2 = Series(['1', '2', '3', '5', '3', '2', '4']) - - expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(tc_2.duplicated(), expected) - assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(tc_2.duplicated(keep='last'), expected) - assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(tc_2.duplicated(keep=False), expected) - assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - def test_drop_duplicates_with_bool(self): - # Test case 1 - tc_1 = Series([True, False, False]) - - expected = Series([False, False, True]) - assert_series_equal(tc_1.duplicated(), expected) - assert_series_equal(tc_1.drop_duplicates(), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - expected = Series([False, True, False]) - assert_series_equal(tc_1.duplicated(keep='last'), expected) - assert_series_equal(tc_1.drop_duplicates(keep='last'), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - expected = Series([False, True, True]) - assert_series_equal(tc_1.duplicated(keep=False), expected) - assert_series_equal(tc_1.drop_duplicates(keep=False), tc_1[~expected]) - sc = tc_1.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_1[~expected]) - - # Test case 2 - tc_2 = Series([True, False, True, False]) - expected = Series([False, False, True, True]) - assert_series_equal(tc_2.duplicated(), expected) - assert_series_equal(tc_2.drop_duplicates(), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([True, True, False, False]) - assert_series_equal(tc_2.duplicated(keep='last'), expected) - assert_series_equal(tc_2.drop_duplicates(keep='last'), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc_2[~expected]) - - expected = Series([True, True, True, True]) - assert_series_equal(tc_2.duplicated(keep=False), expected) - assert_series_equal(tc_2.drop_duplicates(keep=False), tc_2[~expected]) - sc = tc_2.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc_2[~expected]) + assert_series_equal(sc, tc[~expected[2]]) def test_clip(self): val = self.ts.median() From 1952ba326a08169bdd72849a3d78b5572a9b05f7 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Thu, 26 Oct 2017 22:56:18 -0400 Subject: [PATCH 5/6] TST #15752 Re-parametrize test_drop_duplicates --- pandas/tests/series/test_analytics.py | 81 ++++++++++++++++----------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0ff0b5a6fdd62..0f9167dd37ebd 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -873,39 +873,56 @@ def test_unique(self): expected = np.array([1, 2, 3, None], dtype=object) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize("tc, expected", [ - (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')), - [ - Series([False, False, False, False, True, True, False]), - Series([False, True, True, False, False, False, False]), - Series([False, True, True, False, True, True, False]) - ]), - (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')), - [ - Series([False, False, False, False, True, True, False]), - Series([False, True, True, False, False, False, False]), - Series([False, True, True, False, True, True, False]) - ]), - (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')), - [ - Series([False, False, False, False, True, True, False]), - Series([False, True, True, False, False, False, False]), - Series([False, True, True, False, True, True, False]) - ]), - (Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')), - [ - Series([False, False, False, False, True, True, False]), - Series([False, True, True, False, False, False, False]), - Series([False, True, True, False, True, True, False]) - ]), - (Series([True, False, True, False]), + @pytest.mark.parametrize( + "tc", [ - Series([False, False, True, True]), - Series([True, True, False, False]), - Series([True, True, True, True]) - ]) - ]) - def test_drop_duplicates(self, tc, expected): + Series([1, 2, 3, 3], dtype=np.dtype('int_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')), + Series([1, 2, 3, 3], dtype=np.dtype('uint')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')), + Series([1, 2, 3, 3], dtype=np.dtype('float_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')), + Series([1, 2, 3, 3], dtype=np.dtype('unicode_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')), + Series([True, False, False]), + Series([True, False, True, False]) + ] + ) + def test_drop_duplicates(self, tc): + expected_results = { + "shorter_series_non_bool": [ + Series([False, False, False, True]), + Series([False, False, True, False]), + Series([False, False, True, True]) + ], + "longer_series_non_bool": [ + Series([False, False, False, False, True, True, False]), + Series([False, True, True, False, False, False, False]), + Series([False, True, True, False, True, True, False]) + ], + "shorter_series_bool": [ + Series([False, False, True]), + Series([False, True, False]), + Series([False, True, True]) + ], + "longer_series_bool": [ + Series([False, False, True, True]), + Series([True, True, False, False]), + Series([True, True, True, True]) + ] + } + + if (tc.dtype != "bool"): + if (tc.size == 7): + expected = expected_results["longer_series_non_bool"] + else: + expected = expected_results["shorter_series_non_bool"] + else: + if (tc.size == 4): + expected = expected_results["longer_series_bool"] + else: + expected = expected_results["shorter_series_bool"] + assert_series_equal(tc.duplicated(), expected[0]) assert_series_equal(tc.drop_duplicates(), tc[~expected[0]]) sc = tc.copy() From 7b035e49d2a42d8b0a58c7a00fb8eceeb46b91f9 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sat, 28 Oct 2017 00:19:04 -0400 Subject: [PATCH 6/6] TST #15752: create separate method to test drop duplicates for bools and fix test parameterization --- pandas/tests/series/test_analytics.py | 130 +++++++++++++++----------- 1 file changed, 76 insertions(+), 54 deletions(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0f9167dd37ebd..2625f4be840c4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -874,72 +874,94 @@ def test_unique(self): tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( - "tc", + "tc1, tc2", [ - Series([1, 2, 3, 3], dtype=np.dtype('int_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')), - Series([1, 2, 3, 3], dtype=np.dtype('uint')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')), - Series([1, 2, 3, 3], dtype=np.dtype('float_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')), - Series([1, 2, 3, 3], dtype=np.dtype('unicode_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')), - Series([True, False, False]), - Series([True, False, True, False]) + ( + Series([1, 2, 3, 3], dtype=np.dtype('int_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('uint')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('float_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('unicode_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')) + ) ] ) - def test_drop_duplicates(self, tc): - expected_results = { - "shorter_series_non_bool": [ - Series([False, False, False, True]), - Series([False, False, True, False]), - Series([False, False, True, True]) - ], - "longer_series_non_bool": [ - Series([False, False, False, False, True, True, False]), - Series([False, True, True, False, False, False, False]), - Series([False, True, True, False, True, True, False]) - ], - "shorter_series_bool": [ - Series([False, False, True]), - Series([False, True, False]), - Series([False, True, True]) - ], - "longer_series_bool": [ - Series([False, False, True, True]), - Series([True, True, False, False]), - Series([True, True, True, True]) - ] - } - - if (tc.dtype != "bool"): - if (tc.size == 7): - expected = expected_results["longer_series_non_bool"] - else: - expected = expected_results["shorter_series_non_bool"] - else: - if (tc.size == 4): - expected = expected_results["longer_series_bool"] - else: - expected = expected_results["shorter_series_bool"] + def test_drop_duplicates_non_bool(self, tc1, tc2): + # Test case 1 + expected = Series([False, False, False, True]) + assert_series_equal(tc1.duplicated(), expected) + assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, False]) + assert_series_equal(tc1.duplicated(keep='last'), expected) + assert_series_equal(tc1.drop_duplicates(keep='last'), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, True]) + assert_series_equal(tc1.duplicated(keep=False), expected) + assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, tc1[~expected]) + + # Test case 2 + expected = Series([False, False, False, False, True, True, False]) + assert_series_equal(tc2.duplicated(), expected) + assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + assert_series_equal(tc2.duplicated(keep='last'), expected) + assert_series_equal(tc2.drop_duplicates(keep='last'), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + assert_series_equal(tc2.duplicated(keep=False), expected) + assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_bool(self): + tc = Series([True, False, True, False]) - assert_series_equal(tc.duplicated(), expected[0]) - assert_series_equal(tc.drop_duplicates(), tc[~expected[0]]) + expected = Series([False, False, True, True]) + assert_series_equal(tc.duplicated(), expected) + assert_series_equal(tc.drop_duplicates(), tc[~expected]) sc = tc.copy() sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc[~expected[0]]) + assert_series_equal(sc, tc[~expected]) - assert_series_equal(tc.duplicated(keep='last'), expected[1]) - assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected[1]]) + expected = Series([True, True, False, False]) + assert_series_equal(tc.duplicated(keep='last'), expected) + assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected]) sc = tc.copy() sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc[~expected[1]]) + assert_series_equal(sc, tc[~expected]) - assert_series_equal(tc.duplicated(keep=False), expected[2]) - assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected[2]]) + expected = Series([True, True, True, True]) + assert_series_equal(tc.duplicated(keep=False), expected) + assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) sc = tc.copy() sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc[~expected[2]]) + assert_series_equal(sc, tc[~expected]) def test_clip(self): val = self.ts.median()