From b892c68d6d534b350e531f7c9c36d3fa7b03f1a3 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 29 Sep 2019 22:43:57 +0530 Subject: [PATCH 01/35] Fix #28669 --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/arrays/categorical/test_api.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 870628500af21..230e3e3edf2f1 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1118,7 +1118,7 @@ def remove_categories(self, removals, inplace=False): # GH 10156 if any(isna(removals)): - not_included = [x for x in not_included if notna(x)] + not_included = {x for x in not_included if notna(x)} new_categories = [x for x in new_categories if notna(x)] if len(not_included) != 0: diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index ab07b3c96a1db..7c9611466bbe8 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -334,9 +334,23 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) + # remove null + res = cat.remove_categories() + tm.assert_categorical_equal((cat, old)) + tm.assert_categorical_equal((res, old)) + # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) + + # remove duplicates + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical([np.nan, "b", "c", np.nan], categories=["a", "b"], ordered=True) + + res = cat.remove_categories("a") + tm.assert_categorical_equal((cat, old)) + tm.assert_categorical_equal((res, new)) assert res is None # removal is not in categories From 9236f99f6e9f0d13119e9af9781b8d455bfb72b9 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 08:02:15 +0530 Subject: [PATCH 02/35] Fix issue #28669 --- pandas/tests/arrays/categorical/test_api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 7c9611466bbe8..cedbb2ba15d55 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -344,13 +344,13 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, new) # remove duplicates - cat = Categorical(["a", "b", "c", "a"], ordered=True) - old = cat.copy() - new = Categorical([np.nan, "b", "c", np.nan], categories=["a", "b"], ordered=True) + cat1 = Categorical(["a", "b", "c", "a"], ordered=True) + old1 = cat.copy() + new1 = Categorical([np.nan, "b", "c", np.nan], categories=["a", "b"], ordered=True) res = cat.remove_categories("a") - tm.assert_categorical_equal((cat, old)) - tm.assert_categorical_equal((res, new)) + tm.assert_categorical_equal((cat1, old1)) + tm.assert_categorical_equal((res1, new1)) assert res is None # removal is not in categories From e6d3d49bdc295ef7b02130a1159a902f99311a40 Mon Sep 17 00:00:00 2001 From: Puneeth K <32433964+punndcoder28@users.noreply.github.com> Date: Mon, 30 Sep 2019 10:58:34 +0530 Subject: [PATCH 03/35] Fix --- pandas/tests/arrays/categorical/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index cedbb2ba15d55..ed1f0d3972ef1 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -346,7 +346,8 @@ def test_remove_categories(self): # remove duplicates cat1 = Categorical(["a", "b", "c", "a"], ordered=True) old1 = cat.copy() - new1 = Categorical([np.nan, "b", "c", np.nan], categories=["a", "b"], ordered=True) + c = ["b", "c"] + new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) res = cat.remove_categories("a") tm.assert_categorical_equal((cat1, old1)) From d18d602c34c36aa96eee9c2dc212200287e046b0 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 19:19:28 +0530 Subject: [PATCH 04/35] Fix issue #28669 --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index cedbb2ba15d55..02b1ad47b9bb4 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -335,7 +335,7 @@ def test_remove_categories(self): tm.assert_categorical_equal(res, new) # remove null - res = cat.remove_categories() + res = cat.remove_categories("") tm.assert_categorical_equal((cat, old)) tm.assert_categorical_equal((res, old)) From 9e2a59bab0a2d4d0e1ac7eac6aad93d162bb8b03 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 20:03:30 +0530 Subject: [PATCH 05/35] Fix issue #28669 --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 68d250ab38931..32bd2c31ef6a9 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -351,7 +351,7 @@ def test_remove_categories(self): res = cat.remove_categories("a") tm.assert_categorical_equal((cat1, old1)) - tm.assert_categorical_equal((res1, new1)) + tm.assert_categorical_equal((res, new1)) assert res is None # removal is not in categories From 41ee2bedfe59bd9bc1a3a27f4be75f74305214f2 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 20:41:07 +0530 Subject: [PATCH 06/35] Removed test for null --- pandas/tests/arrays/categorical/test_api.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 32bd2c31ef6a9..b8a76389ed932 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -334,11 +334,6 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - # remove null - res = cat.remove_categories("") - tm.assert_categorical_equal((cat, old)) - tm.assert_categorical_equal((res, old)) - # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) From 894f60d6adf5fa8455204869ebca9363d412bab5 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 21:23:38 +0530 Subject: [PATCH 07/35] Removed test for null --- pandas/tests/arrays/categorical/test_api.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index b8a76389ed932..e4f2b5a46fdf5 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -339,14 +339,12 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, new) # remove duplicates - cat1 = Categorical(["a", "b", "c", "a"], ordered=True) - old1 = cat.copy() c = ["b", "c"] new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) res = cat.remove_categories("a") - tm.assert_categorical_equal((cat1, old1)) - tm.assert_categorical_equal((res, new1)) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new1) assert res is None # removal is not in categories From bc204d5c30cd9634f86074f57e95504c04a779a7 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 22:09:40 +0530 Subject: [PATCH 08/35] Removed test for null --- pandas/tests/arrays/categorical/test_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index e4f2b5a46fdf5..74914a30c1344 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -339,11 +339,13 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, new) # remove duplicates + cat1 = Categorical(["a", "b", "c", "a"], ordered=True) + old1 = cat.copy() c = ["b", "c"] new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) res = cat.remove_categories("a") - tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(cat1, old1) tm.assert_categorical_equal(res, new1) assert res is None From bb08e1049997e278e73f776cec6e618584f686fc Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 22:38:36 +0530 Subject: [PATCH 09/35] Removed test for null --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 74914a30c1344..4777d29df0833 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -340,7 +340,7 @@ def test_remove_categories(self): # remove duplicates cat1 = Categorical(["a", "b", "c", "a"], ordered=True) - old1 = cat.copy() + old1 = cat1.copy() c = ["b", "c"] new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) From 9c070b678c3a6a18f5a11c3886a4db38eb90e8e2 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 30 Sep 2019 23:05:35 +0530 Subject: [PATCH 10/35] Removed test for null --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 4777d29df0833..9847b91473103 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -344,7 +344,7 @@ def test_remove_categories(self): c = ["b", "c"] new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) - res = cat.remove_categories("a") + res = cat1.remove_categories("a") tm.assert_categorical_equal(cat1, old1) tm.assert_categorical_equal(res, new1) assert res is None From 6d2cbbe1db8b3e67b9e802c654ad0c9c3f39f682 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 1 Oct 2019 18:53:34 +0530 Subject: [PATCH 11/35] Fix issue #28669 --- pandas/tests/arrays/categorical/test_api.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 9847b91473103..b3de949ae4e1b 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -338,15 +338,6 @@ def test_remove_categories(self): res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - # remove duplicates - cat1 = Categorical(["a", "b", "c", "a"], ordered=True) - old1 = cat1.copy() - c = ["b", "c"] - new1 = Categorical([np.nan, "b", "c", np.nan], categories=c, ordered=True) - - res = cat1.remove_categories("a") - tm.assert_categorical_equal(cat1, old1) - tm.assert_categorical_equal(res, new1) assert res is None # removal is not in categories From a2ea7a55dbac65db0ab4f3992403c9833c1c44c1 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 12:11:21 +0530 Subject: [PATCH 12/35] Added tests for removing null and duplicates --- pandas/tests/arrays/categorical/test_api.py | 25 +++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index b3de949ae4e1b..f1c717022f313 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -1,5 +1,6 @@ import numpy as np import pytest +import re from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series from pandas.core.arrays.categorical import _recode_for_categories @@ -337,13 +338,33 @@ def test_remove_categories(self): # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - + assert res is None + # for removing duplicates + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical([np.nan, "b", "c", np.nan], categories=["b", "c"], ordered=True) + + res = cat.remove_categories("a") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.remove_categories(["a"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + assert res in None + # removal is not in categories - with pytest.raises(ValueError): + msg = re.escape("removals must all be in old categories: ['c']") + with pytest.raises(ValueError, match=msg): cat.remove_categories(["c"]) + msg = re.escape("removals must all be in old categories: [np.nan]") + with pytest.raises(ValueError): + cat.remove_categories([np.nan]) + def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) exp_categories_all = Index(["a", "b", "c", "d", "e"]) From f3a26d26645a4f1073db6c2bf13ea41c302836d9 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 12:40:32 +0530 Subject: [PATCH 13/35] Removed assert statement --- pandas/tests/arrays/categorical/test_api.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index f1c717022f313..0eeaa88f7aa2e 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -338,8 +338,6 @@ def test_remove_categories(self): # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - - assert res is None # for removing duplicates cat = Categorical(["a", "b", "c", "a"], ordered=True) @@ -354,7 +352,7 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - assert res in None + assert res is None # removal is not in categories msg = re.escape("removals must all be in old categories: ['c']") From 12b8f1a02b5cbf1c3a9c1ff2f40aacb92de926c5 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 18:07:40 +0530 Subject: [PATCH 14/35] Parameterized pytest.raises --- pandas/tests/arrays/categorical/test_api.py | 24 ++++++--------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 0eeaa88f7aa2e..88d994586d289 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -321,6 +321,10 @@ def test_set_categories_private(self): expected = Categorical(["a", "c", "d"], categories=list("acde")) tm.assert_categorical_equal(cat, expected) + def test_remove_categories_raises(self, error, message, categories, category_to_remove): + with pytest.raises(error, match=message): + categories.remove_categories(category_to_remove) + def test_remove_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() @@ -339,29 +343,13 @@ def test_remove_categories(self): res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - # for removing duplicates - cat = Categorical(["a", "b", "c", "a"], ordered=True) - old = cat.copy() - new = Categorical([np.nan, "b", "c", np.nan], categories=["b", "c"], ordered=True) - - res = cat.remove_categories("a") - tm.assert_categorical_equal(cat, old) - tm.assert_categorical_equal(res, new) - - res = cat.remove_categories(["a"]) - tm.assert_categorical_equal(cat, old) - tm.assert_categorical_equal(res, new) - assert res is None # removal is not in categories msg = re.escape("removals must all be in old categories: ['c']") - with pytest.raises(ValueError, match=msg): - cat.remove_categories(["c"]) + test_remove_categories_raises(ValueError, msg, cat, ["c"]) - msg = re.escape("removals must all be in old categories: [np.nan]") - with pytest.raises(ValueError): - cat.remove_categories([np.nan]) + test_remove_categories_raises(ValueError, msg, cat, ["c", np.nan]) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) From d283f72e18442ddd654d7959c165fb9cf6fc937a Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 18:45:18 +0530 Subject: [PATCH 15/35] Parameterized pytest.raises --- pandas/tests/arrays/categorical/test_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 88d994586d289..a011e22fc135c 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -321,8 +321,8 @@ def test_set_categories_private(self): expected = Categorical(["a", "c", "d"], categories=list("acde")) tm.assert_categorical_equal(cat, expected) - def test_remove_categories_raises(self, error, message, categories, category_to_remove): - with pytest.raises(error, match=message): + def test_remove_categories_raises(self, message, categories, category_to_remove): + with pytest.raises(ValueError, match=message): categories.remove_categories(category_to_remove) def test_remove_categories(self): @@ -347,9 +347,9 @@ def test_remove_categories(self): # removal is not in categories msg = re.escape("removals must all be in old categories: ['c']") - test_remove_categories_raises(ValueError, msg, cat, ["c"]) + self.test_remove_categories_raises(msg, cat, ["c"]) - test_remove_categories_raises(ValueError, msg, cat, ["c", np.nan]) + self.test_remove_categories_raises(msg, cat, ["c", np.nan]) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) From 6ab85bd13791bfe4d2f1b41792d667461d9ac628 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 19:25:55 +0530 Subject: [PATCH 16/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index a011e22fc135c..a17900dc09cfa 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -321,10 +321,6 @@ def test_set_categories_private(self): expected = Categorical(["a", "c", "d"], categories=list("acde")) tm.assert_categorical_equal(cat, expected) - def test_remove_categories_raises(self, message, categories, category_to_remove): - with pytest.raises(ValueError, match=message): - categories.remove_categories(category_to_remove) - def test_remove_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() @@ -345,11 +341,15 @@ def test_remove_categories(self): assert res is None - # removal is not in categories - msg = re.escape("removals must all be in old categories: ['c']") - self.test_remove_categories_raises(msg, cat, ["c"]) - - self.test_remove_categories_raises(msg, cat, ["c", np.nan]) + @pytest.mark.parametrize( + "message, old, new, category", [ + "removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["c"], + "removals must all be in old categories : {'c'}", ["a", "b", "c", "a"], ["c", np.nan] + ], + ) + def test_remove_categories_raises(self, message, old, category): + with pytest.raises(ValueError, match=message): + old.remove_categories(category) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) From 4e520b692d7ce8a3deae31d64d31ed8e2f58d0e6 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 19:33:25 +0530 Subject: [PATCH 17/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index a17900dc09cfa..c3b8662dea133 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -343,8 +343,8 @@ def test_remove_categories(self): @pytest.mark.parametrize( "message, old, new, category", [ - "removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["c"], - "removals must all be in old categories : {'c'}", ["a", "b", "c", "a"], ["c", np.nan] + ("removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["a", "b", np.nan, "a"], ["c"]), + ("removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["a", "b", np.nan, "a"], ["c", np.nan]), ], ) def test_remove_categories_raises(self, message, old, category): From ca84a86b2d85458247abb5b052e526e2eb93e8b9 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 19:56:03 +0530 Subject: [PATCH 18/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index c3b8662dea133..16734e3be89b0 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -347,7 +347,7 @@ def test_remove_categories(self): ("removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["a", "b", np.nan, "a"], ["c", np.nan]), ], ) - def test_remove_categories_raises(self, message, old, category): + def test_remove_categories_raises(self, message, old, new, category): with pytest.raises(ValueError, match=message): old.remove_categories(category) From 67e035e27aa91380259ebd32330ade8a37f08526 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sat, 5 Oct 2019 20:26:07 +0530 Subject: [PATCH 19/35] Fix --- pandas/tests/arrays/categorical/test_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 16734e3be89b0..1e74e814a5d13 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -348,8 +348,10 @@ def test_remove_categories(self): ], ) def test_remove_categories_raises(self, message, old, new, category): + cat = Categorical(old, categories=old) + with pytest.raises(ValueError, match=message): - old.remove_categories(category) + cat.remove_categories(category) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) From 95f90da7383dafaccf8ab454deb2a536389d667c Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 11:27:57 +0530 Subject: [PATCH 20/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 1e74e814a5d13..aa383d2112520 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -1,6 +1,5 @@ import numpy as np import pytest -import re from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series from pandas.core.arrays.categorical import _recode_for_categories @@ -348,7 +347,7 @@ def test_remove_categories(self): ], ) def test_remove_categories_raises(self, message, old, new, category): - cat = Categorical(old, categories=old) + cat = Categorical(old) with pytest.raises(ValueError, match=message): cat.remove_categories(category) From 68b0dd2c3af5f04cb22f22b1e7995153a510d75f Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 12:21:59 +0530 Subject: [PATCH 21/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index aa383d2112520..e2843898e8895 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -341,12 +341,12 @@ def test_remove_categories(self): assert res is None @pytest.mark.parametrize( - "message, old, new, category", [ - ("removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["a", "b", np.nan, "a"], ["c"]), - ("removals must all be in old categories: {'c'}", ["a", "b", "c", "a"], ["a", "b", np.nan, "a"], ["c", np.nan]), + "message, old, category", [ + ("removals must all be in old categories: {'c'}", ["a", "b", "a"], ["c"]), + ("removals must all be in old categories: {'c'}", ["a", "b", "a"], ["c", np.nan]), ], ) - def test_remove_categories_raises(self, message, old, new, category): + def test_remove_categories_raises(self, message, old, category): cat = Categorical(old) with pytest.raises(ValueError, match=message): From 403be039fd0535a4c23d151b461aa2a8b631d70f Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 13:01:35 +0530 Subject: [PATCH 22/35] Parameterized pytest --- pandas/tests/arrays/categorical/test_api.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index e2843898e8895..08218d7ec4cc1 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -342,8 +342,16 @@ def test_remove_categories(self): @pytest.mark.parametrize( "message, old, category", [ - ("removals must all be in old categories: {'c'}", ["a", "b", "a"], ["c"]), - ("removals must all be in old categories: {'c'}", ["a", "b", "a"], ["c", np.nan]), + ( + "removals must all be in old categories: {'c'}", + ["a", "b", "a"], + ["c"] + ), + ( + "removals must all be in old categories: {'c'}", + ["a", "b", "a"], + ["c", np.nan] + ), ], ) def test_remove_categories_raises(self, message, old, category): From 1877d6b2bbaba08dfcab3617cfa6ad2c15e10ff5 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 14:46:07 +0530 Subject: [PATCH 23/35] Parameterized pytest.raises --- pandas/tests/arrays/categorical/test_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 08218d7ec4cc1..e32c1ae34a672 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -343,13 +343,13 @@ def test_remove_categories(self): @pytest.mark.parametrize( "message, old, category", [ ( - "removals must all be in old categories: {'c'}", - ["a", "b", "a"], + "removals must all be in old categories: {'c'}", + ["a", "b", "a"], ["c"] ), ( "removals must all be in old categories: {'c'}", - ["a", "b", "a"], + ["a", "b", "a"], ["c", np.nan] ), ], From e91806b50343202fc0ee6d0baa0df461d763979e Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 23:21:24 +0530 Subject: [PATCH 24/35] Parameterized pytest.raises --- pandas/tests/arrays/categorical/test_api.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index e32c1ae34a672..a72dd57267797 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -341,7 +341,7 @@ def test_remove_categories(self): assert res is None @pytest.mark.parametrize( - "message, old, category", [ + "message, old, removals", [ ( "removals must all be in old categories: {'c'}", ["a", "b", "a"], @@ -352,13 +352,23 @@ def test_remove_categories(self): ["a", "b", "a"], ["c", np.nan] ), + ( + "removals must all be in old categories: {'c'}", + ["a", "b", "a"], + "c" + ), + ( + "removals must all be in old categories: {'c'}", + ["a", "b", "a"], + ["c", "c"] + ) ], ) - def test_remove_categories_raises(self, message, old, category): + def test_remove_categories_raises(self, message, old, removals): cat = Categorical(old) with pytest.raises(ValueError, match=message): - cat.remove_categories(category) + cat.remove_categories(removals) def test_remove_unused_categories(self): c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) From 870c2c3c57352669285b70f0f851dc4c662463bd Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Sun, 6 Oct 2019 23:59:30 +0530 Subject: [PATCH 25/35] Parameterized pytest.raises --- pandas/tests/arrays/categorical/test_api.py | 31 ++++++--------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index a72dd57267797..a0540b7f91800 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -341,31 +341,16 @@ def test_remove_categories(self): assert res is None @pytest.mark.parametrize( - "message, old, removals", [ - ( - "removals must all be in old categories: {'c'}", - ["a", "b", "a"], - ["c"] - ), - ( - "removals must all be in old categories: {'c'}", - ["a", "b", "a"], - ["c", np.nan] - ), - ( - "removals must all be in old categories: {'c'}", - ["a", "b", "a"], - "c" - ), - ( - "removals must all be in old categories: {'c'}", - ["a", "b", "a"], - ["c", "c"] - ) + "removals", [ + (["c"]), + (["c", np.nan]), + ("c"), + (["c", "c"]) ], ) - def test_remove_categories_raises(self, message, old, removals): - cat = Categorical(old) + def test_remove_categories_raises(self, removals): + cat = Categorical(["a", "b", "a"]) + message = "removals must all be in old categories: {'c'}" with pytest.raises(ValueError, match=message): cat.remove_categories(removals) From 120177369205601f3a7448bf8c948e8f6363d081 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Mon, 7 Oct 2019 19:57:51 +0530 Subject: [PATCH 26/35] Added whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/arrays/categorical/test_api.py | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index eb4b72d01d59a..db9a5d509823a 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -166,7 +166,7 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) -- +- Changed the error message in `remove_categories` to show the invalid removals as a set. - diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index a0540b7f91800..8b13137a58fe3 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -1,5 +1,6 @@ import numpy as np import pytest +import re from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series from pandas.core.arrays.categorical import _recode_for_categories @@ -337,20 +338,12 @@ def test_remove_categories(self): # inplace == True res = cat.remove_categories("c", inplace=True) tm.assert_categorical_equal(cat, new) - assert res is None - @pytest.mark.parametrize( - "removals", [ - (["c"]), - (["c", np.nan]), - ("c"), - (["c", "c"]) - ], - ) + @pytest.mark.parametrize("removals", [["c"], (["c", np.nan]), ("c"), (["c", "c"])]) def test_remove_categories_raises(self, removals): cat = Categorical(["a", "b", "a"]) - message = "removals must all be in old categories: {'c'}" + message = re.escape("removals must all be in old categories: {'c'}") with pytest.raises(ValueError, match=message): cat.remove_categories(removals) From 2979c775cf9ae2dc539de16148cd45daa46f3654 Mon Sep 17 00:00:00 2001 From: Puneeth K <32433964+punndcoder28@users.noreply.github.com> Date: Mon, 7 Oct 2019 22:32:32 +0530 Subject: [PATCH 27/35] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index db9a5d509823a..3708d1c6285fb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -166,7 +166,7 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) -- Changed the error message in `remove_categories` to show the invalid removals as a set. +- Changed the error message in :meth:`Categorical.remove_categories` to show the invalid removals as a set. - From 64b6d5e2903769511ba98af6fcf3a7cadedee397 Mon Sep 17 00:00:00 2001 From: Puneeth K <32433964+punndcoder28@users.noreply.github.com> Date: Tue, 8 Oct 2019 11:00:45 +0530 Subject: [PATCH 28/35] Parameterized pytest.raises Co-Authored-By: Simon Hawkins --- pandas/tests/arrays/categorical/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 8b13137a58fe3..c34782b0276e2 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -340,7 +340,7 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, new) assert res is None - @pytest.mark.parametrize("removals", [["c"], (["c", np.nan]), ("c"), (["c", "c"])]) + @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) def test_remove_categories_raises(self, removals): cat = Categorical(["a", "b", "a"]) message = re.escape("removals must all be in old categories: {'c'}") From 1fab64114e8b341f3ce47614c3bbb2f9b5554642 Mon Sep 17 00:00:00 2001 From: Puneeth K <32433964+punndcoder28@users.noreply.github.com> Date: Tue, 8 Oct 2019 11:01:13 +0530 Subject: [PATCH 29/35] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Simon Hawkins --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3708d1c6285fb..bc5ed4ce54b88 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -166,7 +166,7 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) -- Changed the error message in :meth:`Categorical.remove_categories` to show the invalid removals as a set. +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) - From 6e0307ef19c9e94a45a1062b611324473dbe65bc Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 8 Oct 2019 11:03:29 +0530 Subject: [PATCH 30/35] Updated whatsnew --- doc/source/whatsnew/v1.0.0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index db9a5d509823a..ae9f901bac356 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -166,8 +166,9 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) -- Changed the error message in `remove_categories` to show the invalid removals as a set. -- +- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) + Datetimelike From 8cfa15f9f0ed57ca300401aa35e4f52822ee1070 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 8 Oct 2019 11:08:28 +0530 Subject: [PATCH 31/35] Updated whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ae9f901bac356..423dfcb4a668c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -166,6 +166,7 @@ Categorical - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) +- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) - Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) From 1c0e9ea6c0c1040a1ac8deedf167cbefe961dd92 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 8 Oct 2019 11:11:37 +0530 Subject: [PATCH 32/35] Updated whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 423dfcb4a668c..01fe95984cce7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -169,7 +169,7 @@ Categorical - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) - Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) - +- Datetimelike From 475524fbbfb82213bc87aac8bf0f35c29784b996 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 8 Oct 2019 11:12:48 +0530 Subject: [PATCH 33/35] Updated whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 01fe95984cce7..44c65d03ed3e0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -168,7 +168,6 @@ Categorical - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) -- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) - From 3e62466723ce390d2ea280eab43feb0d9d1e389f Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Tue, 8 Oct 2019 11:17:45 +0530 Subject: [PATCH 34/35] Added whatsnew entry --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 44c65d03ed3e0..01fe95984cce7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -168,6 +168,7 @@ Categorical - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) - Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) - From 0c02d527793fb8211721063e24b02e1b47b36ab8 Mon Sep 17 00:00:00 2001 From: Puneeth K Date: Wed, 9 Oct 2019 23:23:18 +0530 Subject: [PATCH 35/35] Updated whatsnew --- pandas/tests/arrays/categorical/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index c34782b0276e2..42087b89a19b5 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -1,6 +1,7 @@ +import re + import numpy as np import pytest -import re from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series from pandas.core.arrays.categorical import _recode_for_categories