From 25272f595fc4274dc310d2e7323fbe254da6e264 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Sep 2022 19:14:58 +0200 Subject: [PATCH 1/6] REGR: assert_index_equal raising with non matching pd.NA --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/_testing/asserters.py | 3 +++ pandas/tests/util/test_assert_index_equal.py | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 61c1628ea14da..bf85ab64ae3aa 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1242,7 +1242,7 @@ Other .. ***DO NOT USE THIS SECTION*** - Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`) -- +- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError` (:issue:`45608`) .. --------------------------------------------------------------------------- .. _whatsnew_150.contributors: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3858670850074..7734363dd89e5 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -398,6 +398,9 @@ def _get_ilevel_values(index, level): if not left.equals(right): mismatch = left._values != right._values + # if is_extension_array_dtype(mismatch): + # mismatch = mismatch.fillna(True) + diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 0b2c2e12a2d2a..c076806031885 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -280,3 +280,11 @@ def test_assert_index_equal_object_ints_order_false(): idx1 = Index([1, 3], dtype="object") idx2 = Index([3, 1], dtype="object") tm.assert_index_equal(idx1, idx2, check_order=False) + + +def test_assert_ea_index_equal_non_matching_na(): + # GH#45608 + idx1 = Index([1, 2], dtype="Int64") + idx2 = Index([1, NA], dtype="Int64") + with pytest.raises(AssertionError, match="50.0 %"): + tm.assert_index_equal(idx1, idx2) From 0428ccccfe45e7dbef457997c56b9aadc2937ac0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Sep 2022 19:16:30 +0200 Subject: [PATCH 2/6] Fix gh ref --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/util/test_assert_index_equal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bf85ab64ae3aa..6c519bea11029 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1242,7 +1242,7 @@ Other .. ***DO NOT USE THIS SECTION*** - Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`) -- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError` (:issue:`45608`) +- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError` (:issue:`48608`) .. --------------------------------------------------------------------------- .. _whatsnew_150.contributors: diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index c076806031885..94b1403075352 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -283,7 +283,7 @@ def test_assert_index_equal_object_ints_order_false(): def test_assert_ea_index_equal_non_matching_na(): - # GH#45608 + # GH#48608 idx1 = Index([1, 2], dtype="Int64") idx2 = Index([1, NA], dtype="Int64") with pytest.raises(AssertionError, match="50.0 %"): From 6b6234685465c6bf9cfcbf26c054c38d4246d9a6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 17 Sep 2022 19:27:14 +0200 Subject: [PATCH 3/6] Fix --- pandas/_testing/asserters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 7734363dd89e5..f3326b86b4700 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -398,8 +398,8 @@ def _get_ilevel_values(index, level): if not left.equals(right): mismatch = left._values != right._values - # if is_extension_array_dtype(mismatch): - # mismatch = mismatch.fillna(True) + if is_extension_array_dtype(mismatch): + mismatch = mismatch.fillna(True) diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) msg = f"{obj} values are different ({np.round(diff, 5)} %)" From cfdbf778d0294633c9d8ef76d1e35cd9dac540f6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 18 Sep 2022 23:58:42 +0200 Subject: [PATCH 4/6] Fix ci --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/_testing/asserters.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6c519bea11029..530a8146e9ccf 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1242,7 +1242,7 @@ Other .. ***DO NOT USE THIS SECTION*** - Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`) -- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError` (:issue:`48608`) +- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`) .. --------------------------------------------------------------------------- .. _whatsnew_150.contributors: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index f3326b86b4700..4871d202f44fe 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -399,7 +399,7 @@ def _get_ilevel_values(index, level): mismatch = left._values != right._values if is_extension_array_dtype(mismatch): - mismatch = mismatch.fillna(True) + mismatch = cast("ExtensionArray", mismatch).fillna(True) diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) msg = f"{obj} values are different ({np.round(diff, 5)} %)" From 712e64f8f8de8c6aed60cf80d029bc47f0a4668a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 19 Sep 2022 23:49:17 +0200 Subject: [PATCH 5/6] Move whatsnew --- doc/source/whatsnew/v1.5.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index f8069b5476d9e..cffb85836789b 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`) - .. --------------------------------------------------------------------------- From f26752d119fde3f0dabc5eb7aa9b32b579cfce43 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 22 Sep 2022 13:49:43 -0700 Subject: [PATCH 6/6] Fix categorical --- pandas/_libs/testing.pyx | 4 ++++ pandas/tests/util/test_assert_index_equal.py | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index e710a6fb6b24e..679cde9932a7a 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -182,6 +182,10 @@ cpdef assert_almost_equal(a, b, # nan / None comparison return True + if isna(a) and not isna(b) or not isna(a) and isna(b): + # boolean value of pd.NA is ambigous + raise AssertionError(f"{a} != {b}") + if a == b: # object comparison return True diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 94b1403075352..71799c73f35c6 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -282,9 +282,13 @@ def test_assert_index_equal_object_ints_order_false(): tm.assert_index_equal(idx1, idx2, check_order=False) -def test_assert_ea_index_equal_non_matching_na(): +@pytest.mark.parametrize("check_categorical", [True, False]) +@pytest.mark.parametrize("check_names", [True, False]) +def test_assert_ea_index_equal_non_matching_na(check_names, check_categorical): # GH#48608 idx1 = Index([1, 2], dtype="Int64") idx2 = Index([1, NA], dtype="Int64") with pytest.raises(AssertionError, match="50.0 %"): - tm.assert_index_equal(idx1, idx2) + tm.assert_index_equal( + idx1, idx2, check_names=check_names, check_categorical=check_categorical + )