From 22f312092db177dcd6424294450bf6e14c51c4a6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:27:26 -0500 Subject: [PATCH 01/13] Add test --- pandas/tests/util/test_assert_series_equal.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index eaf0824f52927..b97c11a65e515 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -1,5 +1,7 @@ import pytest +import numpy as np +import pandas as pd from pandas import Categorical, DataFrame, Series import pandas._testing as tm @@ -194,3 +196,21 @@ def test_series_equal_categorical_mismatch(check_categorical): tm.assert_series_equal(s1, s2, check_categorical=check_categorical) else: _assert_series_equal_both(s1, s2, check_categorical=check_categorical) + + +@pytest.mark.parametrize("check_dtype", [True, False]) +def test_assert_series_equal_extension_dtype_mismatch(check_dtype): + left = pd.Series(np.array([1, 2, 3], dtype="int")) + right = pd.Series(pd.array([1, 2, 3], dtype="Int64")) + + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: int64 +\\[right\\]: Int64""" + + if check_dtype: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=check_dtype) + else: + tm.assert_series_equal(left, right, check_dtype=check_dtype) From 0a915d23ce565813841ff15883ebfe37e0074de5 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:27:40 -0500 Subject: [PATCH 02/13] Change or to and --- pandas/_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index dff15c66750ac..09d016cb39499 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1169,7 +1169,7 @@ def assert_series_equal( check_dtype=check_dtype, obj=str(obj), ) - elif is_extension_array_dtype(left.dtype) or is_extension_array_dtype(right.dtype): + elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype): assert_extension_array_equal(left._values, right._values) elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # DatetimeArray or TimedeltaArray From 7d67fd95c1855eb0e4a839c2d68b0bc11bfe92d9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:30:06 -0500 Subject: [PATCH 03/13] Fixup --- pandas/tests/util/test_assert_series_equal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index b97c11a65e515..75dc598883bf2 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -200,8 +200,8 @@ def test_series_equal_categorical_mismatch(check_categorical): @pytest.mark.parametrize("check_dtype", [True, False]) def test_assert_series_equal_extension_dtype_mismatch(check_dtype): - left = pd.Series(np.array([1, 2, 3], dtype="int")) - right = pd.Series(pd.array([1, 2, 3], dtype="Int64")) + left = Series(np.array([1, 2, 3], dtype="int")) + right = Series(pd.array([1, 2, 3], dtype="Int64")) msg = """Attributes of Series are different From 872f1fcd07a2bd1aac6ca9a7fc3cc4ef67ba39f1 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:36:10 -0500 Subject: [PATCH 04/13] Add interval case --- pandas/_testing.py | 2 +- pandas/tests/util/test_assert_series_equal.py | 26 ++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 09d016cb39499..8f60aee10c95f 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1159,7 +1159,7 @@ def assert_series_equal( f"is not equal to {right._values}." ) raise AssertionError(msg) - elif is_interval_dtype(left.dtype) or is_interval_dtype(right.dtype): + elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype): assert_interval_array_equal(left.array, right.array) elif is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype): _testing.assert_almost_equal( diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 75dc598883bf2..1119f37b97937 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -200,14 +200,32 @@ def test_series_equal_categorical_mismatch(check_categorical): @pytest.mark.parametrize("check_dtype", [True, False]) def test_assert_series_equal_extension_dtype_mismatch(check_dtype): - left = Series(np.array([1, 2, 3], dtype="int")) - right = Series(pd.array([1, 2, 3], dtype="Int64")) + left = Series(np.array([1, 2, 3], dtype="Int64")) + right = left.astype(int) msg = """Attributes of Series are different Attribute "dtype" are different -\\[left\\]: int64 -\\[right\\]: Int64""" +\\[left\\]: Int64 +\\[right\\]: int64""" + + if check_dtype: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=check_dtype) + else: + tm.assert_series_equal(left, right, check_dtype=check_dtype) + + +@pytest.mark.parametrize("check_dtype", [True, False]) +def test_assert_series_equal_extension_dtype_mismatch(check_dtype): + left = Series([pd.Interval(0, 1)], dtype="interval") + right = left.astype(object) + + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: interval\\[int64\\] +\\[right\\]: object""" if check_dtype: with pytest.raises(AssertionError, match=msg): From 121a7513dcabc91a47ca0ee2cf8bd771c1358ff8 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:38:20 -0500 Subject: [PATCH 05/13] Change test name --- pandas/tests/util/test_assert_series_equal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 1119f37b97937..513cd751738ae 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -217,7 +217,7 @@ def test_assert_series_equal_extension_dtype_mismatch(check_dtype): @pytest.mark.parametrize("check_dtype", [True, False]) -def test_assert_series_equal_extension_dtype_mismatch(check_dtype): +def test_assert_series_equal_interval_dtype_mismatch(check_dtype): left = Series([pd.Interval(0, 1)], dtype="interval") right = left.astype(object) From 878a6cb94923f6df724ac926f6b500810d0a567c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 13:39:43 -0500 Subject: [PATCH 06/13] pd not np --- pandas/tests/util/test_assert_series_equal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 513cd751738ae..772f32bc40c0c 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -1,6 +1,5 @@ import pytest -import numpy as np import pandas as pd from pandas import Categorical, DataFrame, Series import pandas._testing as tm @@ -200,7 +199,7 @@ def test_series_equal_categorical_mismatch(check_categorical): @pytest.mark.parametrize("check_dtype", [True, False]) def test_assert_series_equal_extension_dtype_mismatch(check_dtype): - left = Series(np.array([1, 2, 3], dtype="Int64")) + left = Series(pd.array([1, 2, 3], dtype="Int64")) right = left.astype(int) msg = """Attributes of Series are different From ed033e02d6d2a31a43e45e6e8a66780ce76c4f7e Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 14:54:25 -0500 Subject: [PATCH 07/13] Add 32 bit case --- pandas/tests/util/test_assert_series_equal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 772f32bc40c0c..9816c37f755ca 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -206,7 +206,7 @@ def test_assert_series_equal_extension_dtype_mismatch(check_dtype): Attribute "dtype" are different \\[left\\]: Int64 -\\[right\\]: int64""" +\\[right\\]: int[32|64]""" if check_dtype: with pytest.raises(AssertionError, match=msg): From 3b23ee8048844da57280ed5603e5b597249f4f4f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 15:12:49 -0500 Subject: [PATCH 08/13] Update tests --- pandas/tests/util/test_assert_frame_equal.py | 35 +++++++++++++++++++ pandas/tests/util/test_assert_series_equal.py | 26 +++++++------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 3090343ba2fd9..878415d148a57 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -1,5 +1,6 @@ import pytest +import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -218,3 +219,37 @@ def test_frame_equal_unicode(df1, df2, msg, by_blocks_fixture, obj_fixture): msg = msg.format(obj=obj_fixture) with pytest.raises(AssertionError, match=msg): tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture) + + +def test_assert_frame_equal_extension_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + right = left.astype(int) + + msg = """Attributes of DataFrame\\.iloc\\[:, 0\\] \\(column name="a"\\) are different + +Attribute "dtype" are different +\\[left\\]: Int64 +\\[right\\]: int[32|64]""" + + tm.assert_frame_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(left, right, check_dtype=True) + + +def test_assert_frame_equal_interval_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 + left = DataFrame({"a": [pd.Interval(0, 1)]}, dtype="interval") + right = left.astype(object) + + msg = """Attributes of DataFrame\\.iloc\\[:, 0\\] \\(column name="a"\\) are different + +Attribute "dtype" are different +\\[left\\]: interval\\[int64\\] +\\[right\\]: object""" + + tm.assert_frame_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(left, right, check_dtype=True) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 9816c37f755ca..1f8302211cbee 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -197,8 +197,8 @@ def test_series_equal_categorical_mismatch(check_categorical): _assert_series_equal_both(s1, s2, check_categorical=check_categorical) -@pytest.mark.parametrize("check_dtype", [True, False]) -def test_assert_series_equal_extension_dtype_mismatch(check_dtype): +def test_assert_series_equal_extension_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 left = Series(pd.array([1, 2, 3], dtype="Int64")) right = left.astype(int) @@ -208,15 +208,14 @@ def test_assert_series_equal_extension_dtype_mismatch(check_dtype): \\[left\\]: Int64 \\[right\\]: int[32|64]""" - if check_dtype: - with pytest.raises(AssertionError, match=msg): - tm.assert_series_equal(left, right, check_dtype=check_dtype) - else: - tm.assert_series_equal(left, right, check_dtype=check_dtype) + tm.assert_series_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=True) -@pytest.mark.parametrize("check_dtype", [True, False]) -def test_assert_series_equal_interval_dtype_mismatch(check_dtype): +def test_assert_series_equal_interval_dtype_mismatch(): + # https://github.com/pandas-dev/pandas/issues/32747 left = Series([pd.Interval(0, 1)], dtype="interval") right = left.astype(object) @@ -226,8 +225,7 @@ def test_assert_series_equal_interval_dtype_mismatch(check_dtype): \\[left\\]: interval\\[int64\\] \\[right\\]: object""" - if check_dtype: - with pytest.raises(AssertionError, match=msg): - tm.assert_series_equal(left, right, check_dtype=check_dtype) - else: - tm.assert_series_equal(left, right, check_dtype=check_dtype) + tm.assert_series_equal(left, right, check_dtype=False) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(left, right, check_dtype=True) From bc9e4f0f3e96fa70cab73e584558f91f0fdc522a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 15:25:21 -0500 Subject: [PATCH 09/13] Make linter happy --- pandas/tests/util/test_assert_frame_equal.py | 24 ++++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 878415d148a57..4bcf7087c239d 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -226,11 +226,13 @@ def test_assert_frame_equal_extension_dtype_mismatch(): left = DataFrame({"a": [1, 2, 3]}, dtype="Int64") right = left.astype(int) - msg = """Attributes of DataFrame\\.iloc\\[:, 0\\] \\(column name="a"\\) are different - -Attribute "dtype" are different -\\[left\\]: Int64 -\\[right\\]: int[32|64]""" + msg = ( + "Attributes of DataFrame\\.iloc\\[:, 0\\] " + '\\(column name="a"\\) are different\n\n' + 'Attribute "dtype" are different\n' + "\\[left\\]: Int64\n" + "\\[right\\]: int[32|64]" + ) tm.assert_frame_equal(left, right, check_dtype=False) @@ -243,11 +245,13 @@ def test_assert_frame_equal_interval_dtype_mismatch(): left = DataFrame({"a": [pd.Interval(0, 1)]}, dtype="interval") right = left.astype(object) - msg = """Attributes of DataFrame\\.iloc\\[:, 0\\] \\(column name="a"\\) are different - -Attribute "dtype" are different -\\[left\\]: interval\\[int64\\] -\\[right\\]: object""" + msg = ( + "Attributes of DataFrame\\.iloc\\[:, 0\\] " + '\\(column name="a"\\) are different\n\n' + 'Attribute "dtype" are different\n' + "\\[left\\]: interval\\[int64\\]\n" + "\\[right\\]: object" + ) tm.assert_frame_equal(left, right, check_dtype=False) From a1226d28ad38959f8c495d5baf772fd0362947a1 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 15:29:59 -0500 Subject: [PATCH 10/13] Nit --- pandas/tests/util/test_assert_frame_equal.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 4bcf7087c239d..2b89b5fe4b108 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -227,11 +227,11 @@ def test_assert_frame_equal_extension_dtype_mismatch(): right = left.astype(int) msg = ( - "Attributes of DataFrame\\.iloc\\[:, 0\\] " + 'Attributes of DataFrame\\.iloc\\[:, 0\\] ' '\\(column name="a"\\) are different\n\n' 'Attribute "dtype" are different\n' - "\\[left\\]: Int64\n" - "\\[right\\]: int[32|64]" + '\\[left\\]: Int64\n' + '\\[right\\]: int[32|64]' ) tm.assert_frame_equal(left, right, check_dtype=False) @@ -246,11 +246,11 @@ def test_assert_frame_equal_interval_dtype_mismatch(): right = left.astype(object) msg = ( - "Attributes of DataFrame\\.iloc\\[:, 0\\] " + 'Attributes of DataFrame\\.iloc\\[:, 0\\] ' '\\(column name="a"\\) are different\n\n' 'Attribute "dtype" are different\n' - "\\[left\\]: interval\\[int64\\]\n" - "\\[right\\]: object" + '\\[left\\]: interval\\[int64\\]\n' + '\\[right\\]: object' ) tm.assert_frame_equal(left, right, check_dtype=False) From e828faf3b8afbbce53165d4595ffb8a297367977 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 15:57:49 -0500 Subject: [PATCH 11/13] Un nit --- pandas/tests/util/test_assert_frame_equal.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 2b89b5fe4b108..4bcf7087c239d 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -227,11 +227,11 @@ def test_assert_frame_equal_extension_dtype_mismatch(): right = left.astype(int) msg = ( - 'Attributes of DataFrame\\.iloc\\[:, 0\\] ' + "Attributes of DataFrame\\.iloc\\[:, 0\\] " '\\(column name="a"\\) are different\n\n' 'Attribute "dtype" are different\n' - '\\[left\\]: Int64\n' - '\\[right\\]: int[32|64]' + "\\[left\\]: Int64\n" + "\\[right\\]: int[32|64]" ) tm.assert_frame_equal(left, right, check_dtype=False) @@ -246,11 +246,11 @@ def test_assert_frame_equal_interval_dtype_mismatch(): right = left.astype(object) msg = ( - 'Attributes of DataFrame\\.iloc\\[:, 0\\] ' + "Attributes of DataFrame\\.iloc\\[:, 0\\] " '\\(column name="a"\\) are different\n\n' 'Attribute "dtype" are different\n' - '\\[left\\]: interval\\[int64\\]\n' - '\\[right\\]: object' + "\\[left\\]: interval\\[int64\\]\n" + "\\[right\\]: object" ) tm.assert_frame_equal(left, right, check_dtype=False) From 5c16f18509905fc27ba648b041c748b48940c4f6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 16 Mar 2020 22:12:52 -0500 Subject: [PATCH 12/13] Add release note --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1663d4c44c362..3207bfe94d841 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -398,6 +398,7 @@ Other - Bug in :meth:`DataFrame.to_records` incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`) - Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`). - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`) +- Fixed bug in :func:`pandas._testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`) .. --------------------------------------------------------------------------- From b65b37c163a7b6258ac720f72b4d0ada3c5d1a2b Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Tue, 17 Mar 2020 08:10:19 -0500 Subject: [PATCH 13/13] Update doc/source/whatsnew/v1.1.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 42fc2e56220a3..0d3a9a8f969a4 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -401,7 +401,7 @@ Other - Bug in :meth:`DataFrame.to_records` incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`) - Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`). - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`) -- Fixed bug in :func:`pandas._testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`) +- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`) .. ---------------------------------------------------------------------------