Skip to content

Backport PR #50396 on branch 1.5.x (BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy) #50739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Bug fixes
- Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`)
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,10 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo
if "boolean value of NA is ambiguous" in str(err):
return False
raise
except ValueError:
# numpy can raise a ValueError if left and right cannot be
# compared (e.g. nested arrays)
return False
return True


Expand Down
116 changes: 109 additions & 7 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,18 +525,120 @@ def test_array_equivalent_str(dtype):
)


def test_array_equivalent_nested():
@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested(strict_nan):
# reached in groupby aggregations, make sure we use np.any when checking
# if the comparison is truthy
left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)
right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)

assert array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right[::-1], strict_nan=True)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object)
left = np.empty(2, dtype=object)
left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])]
right = np.empty(2, dtype=object)
right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])]
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested2(strict_nan):
# more than one level of nesting
left = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
right = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object)
right = np.array([50])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested_list(strict_nan):
left = np.array([[50, 70, 90], [20, 30]], dtype=object)
right = np.array([[50, 70, 90], [20, 30]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([[50, 50, 50], [40, 40]], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_mixed_list(strict_nan):
# mixed arrays / lists in left and right
# https://github.com/pandas-dev/pandas/issues/50360
left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object)
right = np.array([[1, 2, 3], [4, 5]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# multiple levels of nesting
left = np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object),
],
dtype=object,
)
right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# same-length lists
subarr = np.empty(2, dtype=object)
subarr[:] = [
np.array([None, "b"], dtype=object),
np.array(["c", "d"], dtype=object),
]
left = np.array([subarr, None], dtype=object)
right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_dicts(strict_nan):
left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object)
right = np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object)
assert array_equivalent(left, right2, strict_nan=strict_nan)
assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan)


@pytest.mark.parametrize(
Expand Down
84 changes: 84 additions & 0 deletions pandas/tests/util/test_assert_almost_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,87 @@ def test_assert_almost_equal_iterable_values_mismatch():

with pytest.raises(AssertionError, match=msg):
tm.assert_almost_equal([1, 2], [1, 3])


subarr = np.empty(2, dtype=object)
subarr[:] = [np.array([None, "b"], dtype=object), np.array(["c", "d"], dtype=object)]

NESTED_CASES = [
# nested array
(
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
),
# >1 level of nesting
(
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
),
# lists
(
np.array([[50, 70, 90], [20, 30]], dtype=object),
np.array([[50, 70, 90], [20, 30]], dtype=object),
),
# mixed array/list
(
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([[1, 2, 3], [4, 5]], dtype=object),
),
(
np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array(
[np.array([6]), np.array([7, 8]), np.array([9])], dtype=object
),
],
dtype=object,
),
np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object),
),
# same-length lists
(
np.array([subarr, None], dtype=object),
np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object),
),
# dicts
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
),
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object),
),
# array/list of dicts
(
np.array(
[
np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
),
np.array([], dtype=object),
],
dtype=object,
),
np.array([[{"f1": 1, "f2": ["a", "b"]}], []], dtype=object),
),
]


@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
@pytest.mark.parametrize("a,b", NESTED_CASES)
def test_assert_almost_equal_array_nested(a, b):
_assert_almost_equal_both(a, b)