From 1b654c9c9ba54ed94b9cf1d984fe1f4546436ab6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 3 Jan 2024 23:59:51 +0100 Subject: [PATCH 1/7] TST: Don't ignore tolerance for integer series --- pandas/_testing/asserters.py | 31 ++++++++++++++----- pandas/tests/util/test_assert_series_equal.py | 7 +++++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d0f38c85868d4..5b049b7a5b366 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -10,6 +10,7 @@ import numpy as np +from pandas._libs import lib from pandas._libs.missing import is_matching_na from pandas._libs.sparse import SparseIndex import pandas._libs.testing as _testing @@ -811,14 +812,14 @@ def assert_series_equal( check_index_type: bool | Literal["equiv"] = "equiv", check_series_type: bool = True, check_names: bool = True, - check_exact: bool = False, + check_exact: bool | lib.NoDefault = lib.no_default, check_datetimelike_compat: bool = False, check_categorical: bool = True, check_category_order: bool = True, check_freq: bool = True, check_flags: bool = True, - rtol: float = 1.0e-5, - atol: float = 1.0e-8, + rtol: float | lib.NoDefault = lib.no_default, + atol: float | lib.NoDefault = lib.no_default, obj: str = "Series", *, check_index: bool = True, @@ -877,6 +878,25 @@ def assert_series_equal( >>> tm.assert_series_equal(a, b) """ __tracebackhide__ = True + if ( + check_exact is lib.no_default + and rtol is lib.no_default + and atol is lib.no_default + ): + if ( + is_numeric_dtype(left.dtype) + and not is_float_dtype(left.dtype) + or is_numeric_dtype(right.dtype) + and not is_float_dtype(right.dtype) + ): + check_exact = True + else: + check_exact = False + elif check_exact is lib.no_default: + check_exact = False + + rtol = rtol if rtol is not lib.no_default else 1.0e-5 + atol = atol if atol is not lib.no_default else 1.0e-8 if not check_index and check_like: raise ValueError("check_like must be False if check_index is False") @@ -931,10 +951,7 @@ def assert_series_equal( pass else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") - if check_exact or ( - (is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)) - or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype)) - ): + if check_exact: left_values = left._values right_values = right._values # Only check exact if dtype is numeric diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index c4ffc197298f0..4f0f3fca3f682 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -462,3 +462,10 @@ def test_ea_and_numpy_no_dtype_check(val, check_exact, dtype): left = Series([1, 2, val], dtype=dtype) right = Series(pd.array([1, 2, val])) tm.assert_series_equal(left, right, check_dtype=False, check_exact=check_exact) + + +def test_assert_series_equal_int_tol(): + # GH#56646 + left = Series([81, 18, 121, 38, 74, 72, 81, 81, 146, 81, 81, 170, 74, 74]) + right = Series([72, 9, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72]) + tm.assert_series_equal(left, right, rtol=1.5) From a6b150381246607713cb9f2983f19af79fe696cf Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 4 Jan 2024 21:43:20 +0100 Subject: [PATCH 2/7] Add whatsnew --- pandas/_testing/asserters.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 5b049b7a5b366..847caef7911ab 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -842,7 +842,12 @@ def assert_series_equal( check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False - Whether to compare number exactly. Only takes effect for float dtypes. + Whether to compare number exactly. + + .. versionchanged:: 2.2.0 + + Defaults to True for integer dtypes if non of + ``check_exact``, ``rtol`` and ``atol`` are specified. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True From 987bb6cf427d9a6806ba917644a3dcb1ebf64d14 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 4 Jan 2024 21:43:49 +0100 Subject: [PATCH 3/7] Add whatsnew --- pandas/_testing/asserters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 847caef7911ab..be5f166613857 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -846,7 +846,7 @@ def assert_series_equal( .. versionchanged:: 2.2.0 - Defaults to True for integer dtypes if non of + Defaults to True for integer dtypes if none of ``check_exact``, ``rtol`` and ``atol`` are specified. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. From 8b287cfe887a9952eea96c39527e6647be95581b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 4 Jan 2024 22:31:21 +0100 Subject: [PATCH 4/7] Fixup --- pandas/_testing/asserters.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index be5f166613857..abb87ec923ea0 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -888,15 +888,12 @@ def assert_series_equal( and rtol is lib.no_default and atol is lib.no_default ): - if ( + check_exact = ( is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype) or is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype) - ): - check_exact = True - else: - check_exact = False + ) elif check_exact is lib.no_default: check_exact = False From 27a11a37be1a5aecc34836a1494220a2d9e0b799 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 6 Jan 2024 15:53:35 +0100 Subject: [PATCH 5/7] Update --- pandas/_testing/asserters.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index abb87ec923ea0..3df82b6bb9f6a 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -716,7 +716,12 @@ def assert_extension_array_equal( index_values : Index | numpy.ndarray, default None Optional index (shared by both left and right), used in output. check_exact : bool, default False - Whether to compare number exactly. Only takes effect for float dtypes. + Whether to compare number exactly. + + .. versionchanged:: 2.2.0 + + Defaults to True for integer dtypes if none of + ``check_exact``, ``rtol`` and ``atol`` are specified. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. atol : float, default 1e-8 @@ -1122,7 +1127,12 @@ def assert_frame_equal( Specify how to compare internal data. If False, compare by columns. If True, compare by blocks. check_exact : bool, default False - Whether to compare number exactly. Only takes effect for float dtypes. + Whether to compare number exactly. + + .. versionchanged:: 2.2.0 + + Defaults to True for integer dtypes if none of + ``check_exact``, ``rtol`` and ``atol`` are specified. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True From ca48df44fe43467f8a23e2693891d5e4e2625f83 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 6 Jan 2024 16:01:15 +0100 Subject: [PATCH 6/7] Update --- pandas/_testing/asserters.py | 50 +++++++++++++++---- pandas/tests/util/test_assert_series_equal.py | 5 ++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3df82b6bb9f6a..162692c1994f6 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -699,9 +699,9 @@ def assert_extension_array_equal( right, check_dtype: bool | Literal["equiv"] = True, index_values=None, - check_exact: bool = False, - rtol: float = 1.0e-5, - atol: float = 1.0e-8, + check_exact: bool | lib.NoDefault = lib.no_default, + rtol: float | lib.NoDefault = lib.no_default, + atol: float | lib.NoDefault = lib.no_default, obj: str = "ExtensionArray", ) -> None: """ @@ -745,6 +745,23 @@ def assert_extension_array_equal( >>> b, c = a.array, a.array >>> tm.assert_extension_array_equal(b, c) """ + if ( + check_exact is lib.no_default + and rtol is lib.no_default + and atol is lib.no_default + ): + check_exact = ( + is_numeric_dtype(left.dtype) + and not is_float_dtype(left.dtype) + or is_numeric_dtype(right.dtype) + and not is_float_dtype(right.dtype) + ) + elif check_exact is lib.no_default: + check_exact = False + + rtol = rtol if rtol is not lib.no_default else 1.0e-5 + atol = atol if atol is not lib.no_default else 1.0e-8 + assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" if check_dtype: @@ -790,10 +807,7 @@ def assert_extension_array_equal( left_valid = left[~left_na].to_numpy(dtype=object) right_valid = right[~right_na].to_numpy(dtype=object) - if check_exact or ( - (is_numeric_dtype(left.dtype) and not is_float_dtype(left.dtype)) - or (is_numeric_dtype(right.dtype) and not is_float_dtype(right.dtype)) - ): + if check_exact: assert_numpy_array_equal( left_valid, right_valid, obj=obj, index_values=index_values ) @@ -1085,14 +1099,14 @@ def assert_frame_equal( check_frame_type: bool = True, check_names: bool = True, by_blocks: bool = False, - check_exact: bool = False, + check_exact: bool | lib.NoDefault = lib.no_default, check_datetimelike_compat: bool = False, check_categorical: bool = True, check_like: bool = False, check_freq: bool = True, check_flags: bool = True, - rtol: float = 1.0e-5, - atol: float = 1.0e-8, + rtol: float | lib.NoDefault = lib.no_default, + atol: float | lib.NoDefault = lib.no_default, obj: str = "DataFrame", ) -> None: """ @@ -1187,6 +1201,22 @@ def assert_frame_equal( >>> assert_frame_equal(df1, df2, check_dtype=False) """ __tracebackhide__ = True + if ( + check_exact is lib.no_default + and rtol is lib.no_default + and atol is lib.no_default + ): + check_exact = ( + is_numeric_dtype(left.dtype) + and not is_float_dtype(left.dtype) + or is_numeric_dtype(right.dtype) + and not is_float_dtype(right.dtype) + ) + elif check_exact is lib.no_default: + check_exact = False + + rtol = rtol if rtol is not lib.no_default else 1.0e-5 + atol = atol if atol is not lib.no_default else 1.0e-8 # instance validation _check_isinstance(left, right, DataFrame) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 4f0f3fca3f682..784a0347cf92b 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -469,3 +469,8 @@ def test_assert_series_equal_int_tol(): left = Series([81, 18, 121, 38, 74, 72, 81, 81, 146, 81, 81, 170, 74, 74]) right = Series([72, 9, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72]) tm.assert_series_equal(left, right, rtol=1.5) + + tm.assert_frame_equal(left.to_frame(), right.to_frame(), rtol=1.5) + tm.assert_extension_array_equal( + left.astype("Int64").values, right.astype("Int64").values, rtol=1.5 + ) From 4e8564d1ca453541b7051223a48298b3115bc41e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 6 Jan 2024 16:05:03 +0100 Subject: [PATCH 7/7] Fixup --- pandas/_testing/asserters.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 162692c1994f6..3de982498e996 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1201,22 +1201,9 @@ def assert_frame_equal( >>> assert_frame_equal(df1, df2, check_dtype=False) """ __tracebackhide__ = True - if ( - check_exact is lib.no_default - and rtol is lib.no_default - and atol is lib.no_default - ): - check_exact = ( - is_numeric_dtype(left.dtype) - and not is_float_dtype(left.dtype) - or is_numeric_dtype(right.dtype) - and not is_float_dtype(right.dtype) - ) - elif check_exact is lib.no_default: - check_exact = False - - rtol = rtol if rtol is not lib.no_default else 1.0e-5 - atol = atol if atol is not lib.no_default else 1.0e-8 + _rtol = rtol if rtol is not lib.no_default else 1.0e-5 + _atol = atol if atol is not lib.no_default else 1.0e-8 + _check_exact = check_exact if check_exact is not lib.no_default else False # instance validation _check_isinstance(left, right, DataFrame) @@ -1240,11 +1227,11 @@ def assert_frame_equal( right.index, exact=check_index_type, check_names=check_names, - check_exact=check_exact, + check_exact=_check_exact, check_categorical=check_categorical, check_order=not check_like, - rtol=rtol, - atol=atol, + rtol=_rtol, + atol=_atol, obj=f"{obj}.index", ) @@ -1254,11 +1241,11 @@ def assert_frame_equal( right.columns, exact=check_column_type, check_names=check_names, - check_exact=check_exact, + check_exact=_check_exact, check_categorical=check_categorical, check_order=not check_like, - rtol=rtol, - atol=atol, + rtol=_rtol, + atol=_atol, obj=f"{obj}.columns", )