From 9106f09a9bffcf5cb29adf91b8896434b9fa2c1e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 18 Dec 2017 10:30:54 -0600 Subject: [PATCH 1/3] BUG: Fixed Categorical.Equals with unordered The original issue was already fixed. I added tests to verify (but no whatsnew entry). This addes tests and a fix for https://github.com/pandas-dev/pandas/issues/16603#issuecomment-349290078 about `Categorical.equals` Closes https://github.com/pandas-dev/pandas/issues/16603 --- doc/source/whatsnew/v0.22.0.txt | 14 -------------- pandas/core/categorical.py | 12 ++++++++++-- pandas/tests/categorical/test_operators.py | 7 +++++++ pandas/tests/indexes/test_category.py | 9 +++++++++ 4 files changed, 26 insertions(+), 16 deletions(-) delete mode 100644 doc/source/whatsnew/v0.22.0.txt diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt deleted file mode 100644 index 2d30e00142846..0000000000000 --- a/doc/source/whatsnew/v0.22.0.txt +++ /dev/null @@ -1,14 +0,0 @@ -.. _whatsnew_0220: - -v0.22.0 -------- - -This is a major release from 0.21.1 and includes a number of API changes, -deprecations, new features, enhancements, and performance improvements along -with a large number of bug fixes. We recommend that all users upgrade to this -version. - -.. _whatsnew_0220.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index baf15b3ca5bc4..a09985f23cc80 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2081,8 +2081,16 @@ def equals(self, other): ------- are_equal : boolean """ - return (self.is_dtype_equal(other) and - np.array_equal(self._codes, other._codes)) + if self.is_dtype_equal(other): + if self.categories.equals(other.categories): + # fastpath to avoid re-coding + return np.array_equal(self._codes, other._codes) + else: + # coerce their codes to ours + codes2 = _recode_for_categories(other.codes, other.categories, + self.categories) + return np.array_equal(self._codes, codes2) + return False def is_dtype_equal(self, other): """ diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py index 09a0607b67a88..fa8bb817616e4 100644 --- a/pandas/tests/categorical/test_operators.py +++ b/pandas/tests/categorical/test_operators.py @@ -250,6 +250,13 @@ def test_compare_different_lengths(self): with tm.assert_raises_regex(TypeError, msg): c1 == c2 + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = pd.Categorical(['a'], categories=['a', 'b']) + b = pd.Categorical(['b'], categories=['b', 'a']) + assert not a.equals(b) + def test_numeric_like_ops(self): df = DataFrame({'value': np.random.randint(0, 10000, 100)}) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index f7328a99195b9..d5c0494df030a 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -747,6 +747,15 @@ def test_equals_categorical(self): ordered=True)) assert ci.equals(ci.copy()) + def test_equals_categoridcal_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = pd.CategoricalIndex(['A'], categories=['A', 'B']) + b = pd.CategoricalIndex(['A'], categories=['B', 'A']) + c = pd.CategoricalIndex(['C'], categories=['B', 'A']) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) From da86c02734d74b1c8009b08dd6b57cb0c11d456b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Dec 2017 09:22:26 -0600 Subject: [PATCH 2/3] simplify --- pandas/core/categorical.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a09985f23cc80..c295faa653dad 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2084,12 +2084,12 @@ def equals(self, other): if self.is_dtype_equal(other): if self.categories.equals(other.categories): # fastpath to avoid re-coding - return np.array_equal(self._codes, other._codes) + other_codes = other._codes else: - # coerce their codes to ours - codes2 = _recode_for_categories(other.codes, other.categories, - self.categories) - return np.array_equal(self._codes, codes2) + other_codes = _recode_for_categories(other.codes, + other.categories, + self.categories) + return np.array_equal(self._codes, other_codes) return False def is_dtype_equal(self, other): From debae2b5ed251432fd126b5b5b70bdf6d6b770c5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 Dec 2017 09:54:02 -0600 Subject: [PATCH 3/3] Release note --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 709009542e160..ced504eaaf356 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -365,7 +365,7 @@ Numeric Categorical ^^^^^^^^^^^ -- +- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`) - -