From 0ecb8c2d029bc4192f24d6da1bd2b213c0b3d6bc Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:18:29 +1100 Subject: [PATCH 01/12] Add failing test --- pandas/_libs/testing.pyx | 4 ++- pandas/tests/dtypes/test_inference.py | 52 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 7a2fa471b9ba8..1b55b9d3819e9 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -99,7 +99,9 @@ cpdef assert_almost_equal(a, b, return True a_is_ndarray = is_array(a) + a_has_size_and_shape = hasattr(a, "size") and hasattr(a, "shape") b_is_ndarray = is_array(b) + b_has_size_and_shape = hasattr(b, "size") and hasattr(b, "shape") if obj is None: if a_is_ndarray or b_is_ndarray: @@ -119,7 +121,7 @@ cpdef assert_almost_equal(a, b, f"Can't compare objects without length, one or both is invalid: ({a}, {b})" ) - if a_is_ndarray and b_is_ndarray: + if (a_is_ndarray and b_is_ndarray) or (a_has_size_and_shape and b_has_size_and_shape): na, nb = a.size, b.size if a.shape != b.shape: from pandas._testing import raise_assert_detail diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0f4cef772458f..7f67c566ef1e8 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -60,6 +60,50 @@ def coerce(request): return request.param +class MockNumpyLikeArray: + """ + A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy + + The key is that it is not actually a numpy array so + ``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other + important properties are that the class defines a :meth:`__iter__` method + (so that ``isinstance(abc.Iterable)`` returns ``True``) and has a + :meth:`ndim` property which can be used as a check for whether it is a + scalar or not. + """ + + def __init__(self, values): + self._values = values + + def __iter__(self): + iter_values = iter(self._values) + + def it_outer(): + for element in iter_values: + yield element + + return it_outer() + + def __len__(self): + return len(self._values) + + @property + def ndim(self): + return self._values.ndim + + @property + def dtype(self): + return self._values.dtype + + @property + def size(self): + return self._values.size + + @property + def shape(self): + return self._values.shape + + # collect all objects to be tested for list-like-ness; use tuples of objects, # whether they are list-like or not (special casing for sets), and their ID ll_params = [ @@ -166,6 +210,14 @@ class DtypeList(list): assert not inference.is_array_like(123) +@pytest.mark.parametrize("eg", ( + np.array(2), + MockNumpyLikeArray(np.array(2)), +)) +def test_assert_almost_equal(eg): + tm.assert_almost_equal(eg, eg) + + @pytest.mark.parametrize( "inner", [ From ad5f714b794bc58c9c18aeb1ba8ff51b782b0527 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:38:07 +1100 Subject: [PATCH 02/12] Add __array__ method to mock numpy-like --- pandas/tests/dtypes/test_inference.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 7f67c566ef1e8..ccd8d2d782930 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -79,14 +79,16 @@ def __iter__(self): iter_values = iter(self._values) def it_outer(): - for element in iter_values: - yield element + yield from iter_values return it_outer() def __len__(self): return len(self._values) + def __array__(self, t=None): + return self._values + @property def ndim(self): return self._values.ndim @@ -210,10 +212,13 @@ class DtypeList(list): assert not inference.is_array_like(123) -@pytest.mark.parametrize("eg", ( - np.array(2), - MockNumpyLikeArray(np.array(2)), -)) +@pytest.mark.parametrize( + "eg", + ( + np.array(2), + MockNumpyLikeArray(np.array(2)), + ), +) def test_assert_almost_equal(eg): tm.assert_almost_equal(eg, eg) From 139493f6bfe8254b287afb82fd3d623bc2b7aa3a Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 5 Jul 2020 15:23:26 +1000 Subject: [PATCH 03/12] TST: GH35131 Add failing test of numpy-like array handling --- pandas/tests/dtypes/test_inference.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ccd8d2d782930..87433e38c9953 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -140,6 +140,15 @@ def shape(self): (np.ndarray((2,) * 4), True, "ndarray-4d"), (np.array([[[[]]]]), True, "ndarray-4d-empty"), (np.array(2), False, "ndarray-0d"), + (MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"), + (MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"), + (MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"), + (MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"), + (MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"), + (MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"), + (MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"), (1, False, "int"), (b"123", False, "bytes"), (b"", False, "bytes-empty"), From ba59fb8ea5daecdbfe0c04793218bb73eb42ab40 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 5 Jul 2020 15:26:06 +1000 Subject: [PATCH 04/12] ENH: GH35131 Implement fix which allows numpy-like handling Now pd.core.dtypes.inference.is_list_like correctly identifies numpy-like scalars as not being iterable --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3a11e7fbbdf33..f2ee70051b8ee 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1047,8 +1047,8 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: isinstance(obj, abc.Iterable) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # exclude zero-dimensional numpy arrays, effectively scalars - and not (util.is_array(obj) and obj.ndim == 0) + # exclude zero-dimensional duck arrays, effectively scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 451fbfcbbc589cfffa6919fd22d6fb63cf00fdc4 Mon Sep 17 00:00:00 2001 From: Zeb Nicholls Date: Mon, 6 Jul 2020 09:43:34 +1000 Subject: [PATCH 05/12] Simplify ndim check Co-authored-by: keewis --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f2ee70051b8ee..db3a329bc0f0a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1048,7 +1048,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude zero-dimensional duck arrays, effectively scalars - and not (hasattr(obj, "ndim") and obj.ndim == 0) + and getattr(obj, "ndim", 0) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 57e186b1df29698275ffa64b1883ecbb7955fe08 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 6 Jul 2020 10:01:36 +1000 Subject: [PATCH 06/12] Revert change because it broke tests I'm not completely sure why, but reverting here for simplicity --- pandas/_libs/lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index db3a329bc0f0a..f2ee70051b8ee 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1048,7 +1048,7 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude zero-dimensional duck arrays, effectively scalars - and getattr(obj, "ndim", 0) != 0 + and not (hasattr(obj, "ndim") and obj.ndim == 0) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 590c2a2c89d6e6e6e538e47f0d71350b8f973472 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 6 Jul 2020 10:20:06 +1000 Subject: [PATCH 07/12] Use slightly clearer logic --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f2ee70051b8ee..17cc78dfa07a1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1047,8 +1047,8 @@ cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: isinstance(obj, abc.Iterable) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # exclude zero-dimensional duck arrays, effectively scalars - and not (hasattr(obj, "ndim") and obj.ndim == 0) + # assume not a 0d array unless there's evidence otherwise + and getattr(obj, "ndim", 1) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From 27bd4e4021f47d035d25f3d9478d4edbc8d718fc Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 7 Jul 2020 10:21:51 +1000 Subject: [PATCH 08/12] Update to use numpy iterable --- pandas/_libs/lib.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 17cc78dfa07a1..306784ff915e5 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1044,11 +1044,9 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - isinstance(obj, abc.Iterable) + np.iterable(obj) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) - # assume not a 0d array unless there's evidence otherwise - and getattr(obj, "ndim", 1) != 0 # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) From d9a2c8f120e8ad25f6496597c39a8f4fb2758fae Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 7 Jul 2020 11:02:09 +1000 Subject: [PATCH 09/12] Add failing is_scalar tests --- pandas/tests/dtypes/test_inference.py | 72 +++++++++++++++++---------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 87433e38c9953..89f5d80267a24 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1493,34 +1493,52 @@ def test_is_scalar_builtin_nonscalars(self): assert not is_scalar(slice(None)) assert not is_scalar(Ellipsis) - def test_is_scalar_numpy_array_scalars(self): - assert is_scalar(np.int64(1)) - assert is_scalar(np.float64(1.0)) - assert is_scalar(np.int32(1)) - assert is_scalar(np.complex64(2)) - assert is_scalar(np.object_("foobar")) - assert is_scalar(np.str_("foobar")) - assert is_scalar(np.unicode_("foobar")) - assert is_scalar(np.bytes_(b"foobar")) - assert is_scalar(np.datetime64("2014-01-01")) - assert is_scalar(np.timedelta64(1, "h")) - - def test_is_scalar_numpy_zerodim_arrays(self): - for zerodim in [ - np.array(1), - np.array("foobar"), - np.array(np.datetime64("2014-01-01")), - np.array(np.timedelta64(1, "h")), - np.array(np.datetime64("NaT")), - ]: - assert not is_scalar(zerodim) - assert is_scalar(lib.item_from_zerodim(zerodim)) - + @pytest.mark.parametrize("start", ( + np.int64(1), + np.float64(1.0), + np.int32(1), + np.complex64(2), + np.object_("foobar"), + np.str_("foobar"), + np.unicode_("foobar"), + np.bytes_(b"foobar"), + np.datetime64("2014-01-01"), + np.timedelta64(1, "h"), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) + def test_is_scalar_numpy_array_scalars(self, start, numpy_like): + if numpy_like: + start = MockNumpyLikeArray(start) + + assert is_scalar(start) + + @pytest.mark.parametrize("zerodim", ( + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) + def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like): + if numpy_like: + zerodim = MockNumpyLikeArray(zerodim) + + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + @pytest.mark.parametrize("start", ( + np.array([]), + np.array([[]]), + np.matrix("1; 2"), + )) + @pytest.mark.parametrize("numpy_like", (True, False)) @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") - def test_is_scalar_numpy_arrays(self): - assert not is_scalar(np.array([])) - assert not is_scalar(np.array([[]])) - assert not is_scalar(np.matrix("1; 2")) + def test_is_scalar_numpy_arrays(self, start, numpy_like): + if numpy_like: + start = MockNumpyLikeArray(start) + + assert not is_scalar(start) def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp("2014-01-01")) From 764e7b1045fa7ba9ffa587bdb54fac0e66f2f345 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 8 Jul 2020 08:53:42 +1000 Subject: [PATCH 10/12] Revert to relying on python's shortcircuit operators Also avoid np.iterable --- pandas/_libs/lib.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 306784ff915e5..a78ae49b3b18f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1044,7 +1044,9 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - np.iterable(obj) + isinstance(obj, abc.Iterable) + # avoid numpy-style scalars + and not (hasattr(obj, "ndim") and obj.ndim == 0) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude sets if allow_sets is False From 088dff81863047d9260a0daba7d2b0015cc4010b Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:13:01 +1100 Subject: [PATCH 11/12] Make a mess --- pandas/_libs/testing.pyx | 1 + pandas/core/dtypes/missing.py | 5 +++++ pandas/tests/dtypes/test_inference.py | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 1b55b9d3819e9..6cd5e45b73e17 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -11,6 +11,7 @@ from pandas._libs.lib import is_complex from pandas._libs.util cimport is_array, is_real_number_object from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import array_equivalent, isna diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ef645313de614..97ad5ff010237 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -424,6 +424,11 @@ def array_equivalent( # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat + try: + return np.array_equal(left, right) + except: + pass + if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): return _array_equivalent_object(left, right, strict_nan) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 89f5d80267a24..11f88cab54fea 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -209,6 +209,8 @@ def test_is_array_like(): assert inference.is_array_like(Series([1, 2])) assert inference.is_array_like(np.array(["a", "b"])) assert inference.is_array_like(Index(["2016-01-01"])) + assert inference.is_array_like(np.array([2, 3])) + assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3]))) class DtypeList(list): dtype = "special" @@ -221,6 +223,12 @@ class DtypeList(list): assert not inference.is_array_like(123) +def test_assert_almost_equal(): + tm.assert_almost_equal(np.array(2), np.array(2)) + eg = MockNumpyLikeArray(np.array(2)) + tm.assert_almost_equal(eg, eg) + + @pytest.mark.parametrize( "eg", ( From 928842fd7f3cae2d13840b90f1b945071a922f90 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 13 Feb 2021 19:42:55 +1100 Subject: [PATCH 12/12] Fix missing module --- pandas/core/dtypes/missing.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 97ad5ff010237..ef645313de614 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -424,11 +424,6 @@ def array_equivalent( # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat - try: - return np.array_equal(left, right) - except: - pass - if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): return _array_equivalent_object(left, right, strict_nan)