diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index cbad169fe4d56..2466fa3961c5e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -91,6 +91,7 @@ Other enhancements - :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) - Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) +- Improve exception message when using :func:`assert_frame_equal` on a :class:`DataFrame` to include the column that is compared (:issue:`50323`) - Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`) - Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`) - Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 5d7daec65c7d1..276d3019e126f 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -680,6 +680,7 @@ def assert_extension_array_equal( check_exact: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, + obj: str = "ExtensionArray", ) -> None: """ Check that left and right ExtensionArrays are equal. @@ -702,6 +703,11 @@ def assert_extension_array_equal( Absolute tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 + obj : str, default 'ExtensionArray' + Specify object name being compared, internally used to show appropriate + assertion message. + + .. versionadded:: 2.0.0 Notes ----- @@ -719,7 +725,7 @@ def assert_extension_array_equal( assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" if check_dtype: - assert_attr_equal("dtype", left, right, obj="ExtensionArray") + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if ( isinstance(left, DatetimeLikeArrayMixin) @@ -729,21 +735,24 @@ def assert_extension_array_equal( # Avoid slow object-dtype comparisons # np.asarray for case where we have a np.MaskedArray assert_numpy_array_equal( - np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values + np.asarray(left.asi8), + np.asarray(right.asi8), + index_values=index_values, + obj=obj, ) return left_na = np.asarray(left.isna()) right_na = np.asarray(right.isna()) assert_numpy_array_equal( - left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values + left_na, right_na, obj=f"{obj} NA mask", index_values=index_values ) left_valid = left[~left_na].to_numpy(dtype=object) right_valid = right[~right_na].to_numpy(dtype=object) if check_exact: assert_numpy_array_equal( - left_valid, right_valid, obj="ExtensionArray", index_values=index_values + left_valid, right_valid, obj=obj, index_values=index_values ) else: _testing.assert_almost_equal( @@ -752,7 +761,7 @@ def assert_extension_array_equal( check_dtype=bool(check_dtype), rtol=rtol, atol=atol, - obj="ExtensionArray", + obj=obj, index_values=index_values, ) @@ -909,6 +918,7 @@ def assert_series_equal( right_values, check_dtype=check_dtype, index_values=np.asarray(left.index), + obj=str(obj), ) else: assert_numpy_array_equal( @@ -955,6 +965,7 @@ def assert_series_equal( atol=atol, check_dtype=check_dtype, index_values=np.asarray(left.index), + obj=str(obj), ) elif is_extension_array_dtype_and_needs_i8_conversion( left.dtype, right.dtype @@ -964,6 +975,7 @@ def assert_series_equal( right._values, check_dtype=check_dtype, index_values=np.asarray(left.index), + obj=str(obj), ) elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype): # DatetimeArray or TimedeltaArray @@ -972,6 +984,7 @@ def assert_series_equal( right._values, check_dtype=check_dtype, index_values=np.asarray(left.index), + obj=str(obj), ) else: _testing.assert_almost_equal( diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e6012a04dfc7a..2e5e2fc77d6c4 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -140,7 +140,7 @@ def test_custom_asserts(self): self.assert_frame_equal(a.to_frame(), a.to_frame()) b = pd.Series(data.take([0, 0, 1])) - msg = r"ExtensionArray are different" + msg = r"Series are different" with pytest.raises(AssertionError, match=msg): self.assert_series_equal(a, b) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index c66a93d2db651..eb9223a8221eb 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -366,3 +366,36 @@ def test_assert_frame_equal_check_like_categorical_midx(): ), ) tm.assert_frame_equal(left, right, check_like=True) + + +def test_assert_frame_equal_ea_column_definition_in_exception_mask(): + # GH#50323 + df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")}) + df2 = DataFrame({"a": pd.Series([1, 1], dtype="Int64")}) + + msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) NA mask values are different' + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2) + + +def test_assert_frame_equal_ea_column_definition_in_exception(): + # GH#50323 + df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")}) + df2 = DataFrame({"a": pd.Series([pd.NA, 2], dtype="Int64")}) + + msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different' + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_exact=True) + + +def test_assert_frame_equal_ts_column(): + # GH#50323 + df1 = DataFrame({"a": [pd.Timestamp("2019-12-31"), pd.Timestamp("2020-12-31")]}) + df2 = DataFrame({"a": [pd.Timestamp("2020-12-31"), pd.Timestamp("2020-12-31")]}) + + msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different' + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 4665077696b2b..835f710842cc0 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_extension_array_dtype - import pandas as pd from pandas import ( Categorical, @@ -116,10 +114,7 @@ def test_less_precise(data1, data2, dtype, decimals): s2 = Series([data2], dtype=dtype) if decimals in (5, 10) or (decimals >= 3 and abs(data1 - data2) >= 0.0005): - if is_extension_array_dtype(dtype): - msg = "ExtensionArray are different" - else: - msg = "Series values are different" + msg = "Series values are different" with pytest.raises(AssertionError, match=msg): tm.assert_series_equal(s1, s2, rtol=rtol) else: @@ -237,9 +232,9 @@ def test_series_equal_categorical_values_mismatch(rtol): def test_series_equal_datetime_values_mismatch(rtol): - msg = """numpy array are different + msg = """Series are different -numpy array values are different \\(100.0 %\\) +Series values are different \\(100.0 %\\) \\[index\\]: \\[0, 1, 2\\] \\[left\\]: \\[1514764800000000000, 1514851200000000000, 1514937600000000000\\] \\[right\\]: \\[1549065600000000000, 1549152000000000000, 1549238400000000000\\]"""