Fix tensordot implementation

lucianopaz · lucianopaz · commit 1e2e7aeb88ce · 2024-01-18T15:58:20.000+01:00
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
@@ -1,6 +1,7 @@
 import builtins
 import warnings
-from typing import TYPE_CHECKING, Optional
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Optional, Union
 
 import numpy as np
 
@@ -15,6 +16,7 @@
 from pytensor.link.c.type import Generic
 from pytensor.misc.safe_asarray import _asarray
 from pytensor.printing import pprint
+from pytensor.raise_op import Assert
 from pytensor.scalar.basic import BinaryScalarOp
 from pytensor.tensor.basic import (
     alloc,
@@ -47,7 +49,11 @@
 )
 from pytensor.tensor.type_other import NoneConst
 from pytensor.tensor.utils import as_list
-from pytensor.tensor.variable import TensorConstant, _tensor_py_operators
+from pytensor.tensor.variable import (
+    TensorConstant,
+    TensorVariable,
+    _tensor_py_operators,
+)
 
 
 if TYPE_CHECKING:
@@ -2266,57 +2272,47 @@ def _tensordot_as_dot(a, b, axes, dot, batched):
         )
 
 
-def tensordot(a, b, axes=2):
+def tensordot(
+    a: "ArrayLike", b: "ArrayLike", axes: Union[int, Sequence[Sequence[int]]] = 1
+) -> TensorVariable:
     """
-    Compute a generalized dot product over provided axes.
+    Compute tensor dot product along specified axes.
+
+    Implementation is mostly taken from numpy version 1.26.0
 
-    Given two tensors a and b, tensordot computes a generalized dot product over
-    the provided axes. PyTensor's implementation reduces all expressions to
-    matrix or vector dot products and is based on code from Tijmen Tieleman's
-    gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
+    Given two tensors, `a` and `b`, and an sequence object containing
+    two sequence objects, ``(a_axes, b_axes)``, sum the products of
+    `a`'s and `b`'s elements (components) over the axes specified by
+    ``a_axes`` and ``b_axes``. The third argument can be a single non-negative
+    integer_like scalar, ``N``; if it is such, then the last ``N`` dimensions
+    of `a` and the first ``N`` dimensions of `b` are summed over.
 
     Parameters
     ----------
-    a: symbolic tensor
-        The first tensor variable.
-    b: symbolic tensor
-        The second tensor variable
-    axes: int or array-like of length 2
-        If an integer, the number of axes to sum over.
-        If an array, it must have two array elements containing the axes
-        to sum over in each tensor.
-
-        Note that the default value of 2 is not guaranteed to work
-        for all values of a and b, and an error will be raised if
-        that is the case. The reason for keeping the default is to
-        maintain the same signature as numpy's tensordot function
-        (and np.tensordot raises analogous errors for non-compatible
-        inputs).
-
-        If an integer i, it is converted to an array containing
-        the last i dimensions of the first tensor and the first
-        i dimensions of the second tensor:
-            axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
-
-        If an array, its two elements must contain compatible axes
-        of the two tensors. For example, [[1, 2], [2, 0]] means sum
-        over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
-        (Remember axes are zero-indexed!) The 2nd axis of a and the
-        3rd axis of b must have the same shape; the same is true for
-        the 3rd axis of a and the 1st axis of b.
+    a, b : ArrayLike
+        Tensors to "dot".
+
+    axes : int or (2,) array_like
+        * integer_like
+          If an int N, sum over the last N axes of `a` and the first N axes
+          of `b` in order. The sizes of the corresponding axes must match.
+        * (2,) array_like
+          Or, a list of axes to be summed over, first sequence applying to `a`,
+          second to `b`. Both elements array_like must be of the same length.
 
     Returns
     -------
-    symbolic tensor
-        A tensor with shape equal to the concatenation of a's shape
-        (less any dimensions that were summed over) and b's shape
-        (less any dimensions that were summed over).
+    output : TensorVariable
+        The tensor dot product of the input.
+        Its shape will be equal to the concatenation of `a` and `b` shapes
+        (ignoring the dimensions that were summed over given in ``a_axes``
+        and ``b_axes``)
 
     Examples
     --------
     It may be helpful to consider an example to see what tensordot does.
-    PyTensor's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
-    and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
+    PyTensor's implementation is identical to NumPy's. Here ``a`` has shape (2, 3, 4)
+    and ``b`` has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
     note that a.shape[1] == b.shape[3] and a.shape[2] == b.shape[2]; these axes
     are compatible. The resulting tensor will have shape (2, 5, 6) -- the
     dimensions that are not being summed:
@@ -2347,10 +2343,9 @@ def tensordot(a, b, axes=2):
     true
 
     This specific implementation avoids a loop by transposing a and b such that
-    the summed axes of a are last and the summed axes of b are first. The
-    resulting arrays are reshaped to 2 dimensions (or left as vectors, if
-    appropriate) and a matrix or vector dot product is taken. The result is
-    reshaped back to the required output dimensions.
+    the summed axes of ``a`` are last and the summed axes of ``b`` are first. The
+    resulting arrays are reshaped to 2 dimensions and a matrix dot product is taken.
+    The result is reshaped back to the required output dimensions.
 
     In an extreme case, no axes may be specified. The resulting tensor
     will have shape equal to the concatenation of the shapes of a and b:
@@ -2366,7 +2361,92 @@ def tensordot(a, b, axes=2):
     See the documentation of numpy.tensordot for more examples.
 
     """
-    return _tensordot_as_dot(a, b, axes, dot=dot, batched=False)
+    try:
+        iter(axes)
+    except Exception:
+        axes_a = list(range(-axes, 0))
+        axes_b = list(range(0, axes))
+    else:
+        axes_a, axes_b = axes
+    try:
+        na = len(axes_a)
+        axes_a = list(axes_a)
+    except TypeError:
+        axes_a = [axes_a]
+        na = 1
+    try:
+        nb = len(axes_b)
+        axes_b = list(axes_b)
+    except TypeError:
+        axes_b = [axes_b]
+        nb = 1
+
+    a = as_tensor_variable(a)
+    b = as_tensor_variable(b)
+    as_ = a.shape
+    bra = a.broadcastable
+    ats = a.type.shape
+    nda = a.ndim
+    bs = b.shape
+    brb = b.broadcastable
+    bts = b.type.shape
+    ndb = b.ndim
+    if na != nb:
+        raise ValueError(
+            "The number of axes supplied for tensordot must be equal for each tensor. "
+            f"Got {na} and {nb} respectively."
+        )
+    for k in range(na):
+        ax_a = axes_a[k]
+        ax_b = axes_b[k]
+        if ax_a < 0:
+            axes_a[k] += nda
+        if axes_a[k] < 0 or axes_a[k] >= nda:
+            raise ValueError(
+                f"Supplied axes {ax_a} for first input of tensordot is out of bounds. "
+                f"Input tensor has only ndim={nda}."
+            )
+        if ax_b < 0:
+            axes_b[k] += ndb
+        if axes_b[k] < 0 or axes_b[k] >= ndb:
+            raise ValueError(
+                f"Supplied axes {ax_b} for first input of tensordot is out of bounds. "
+                f"Input tensor has only ndim={ndb}."
+            )
+        if (bra[ax_a] != brb[ax_b]) or (
+            ats[ax_a] is not None and bts[ax_b] is not None and ats[ax_a] != bts[ax_b]
+        ):
+            raise ValueError(
+                "Input arrays have inconsistent broadcastable pattern or type shape along the axes "
+                "must be multiplied and summed with tensordot."
+            )
+        elif ats[ax_a] is None or bts[ax_b] is None:
+            a = Assert(
+                "Input array shape along reduced axes of tensordot are not equal"
+            )(a, eq(a.shape[ax_a], b.shape[ax_b]))
+
+    # Move the axes to sum over to the end of "a"
+    # and to the front of "b"
+    notin = [k for k in range(nda) if k not in axes_a]
+    newaxes_a = notin + axes_a
+    N2 = 1
+    for axis in axes_a:
+        N2 *= as_[axis]
+    newshape_a = (cast(prod([as_[ax] for ax in notin]), "int64"), N2)
+    olda = [as_[axis] for axis in notin]
+
+    notin = [k for k in range(ndb) if k not in axes_b]
+    newaxes_b = axes_b + notin
+    N2 = 1
+    for axis in axes_b:
+        N2 *= bs[axis]
+    newshape_b = (N2, cast(prod([bs[ax] for ax in notin]), "int64"))
+    oldb = [bs[axis] for axis in notin]
+
+    at = a.transpose(newaxes_a).reshape(newshape_a)
+    bt = b.transpose(newaxes_b).reshape(newshape_b)
+    res = _dot(at, bt)
+    return res.reshape(olda + oldb)
 
 
 def outer(x, y):
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
@@ -24,12 +24,14 @@
 from pytensor.link.c.basic import DualLinker
 from pytensor.misc.safe_asarray import _asarray
 from pytensor.printing import pprint
+from pytensor.raise_op import Assert
 from pytensor.tensor import blas, blas_c
 from pytensor.tensor.basic import (
     as_tensor_variable,
     constant,
     eye,
     get_underlying_scalar_constant_value,
+    ones,
     switch,
 )
 from pytensor.tensor.blas import Dot22
@@ -2187,8 +2189,9 @@ def test_broadcastable1(self):
         rng = np.random.default_rng(seed=utt.fetch_seed())
         x = TensorType(dtype=config.floatX, shape=(1, None, None))("x")
         y = tensor3("y")
-        z = tensordot(x, y)
+        z = tensordot(x, y, axes=2)
         assert z.broadcastable == (True, False)
+        assert z.type.shape == (1, None)
         f = inplace_func([x, y], z)
         xv = random(1, 3, 4, rng=rng)
         yv = random(3, 4, 5, rng=rng)
@@ -2202,12 +2205,71 @@ def test_broadcastable2(self):
         axes = [[2, 1], [0, 1]]
         z = tensordot(x, y, axes=axes)
         assert z.broadcastable == (True, False)
+        assert z.type.shape == (1, None)
         f = inplace_func([x, y], z)
         xv = random(1, 3, 4, rng=rng)
         yv = random(4, 3, 5, rng=rng)
         zv = f(xv, yv)
         assert np.allclose(np.tensordot(xv, yv, axes=axes), zv)
 
+    def test_type_shape(self):
+        x = ones(shape=(7, 3, 2))
+        y = ones(
+            shape=(
+                10,
+                2,
+            )
+        )
+        xv = x.eval()
+        yv = y.eval()
+        sy = tensor("sy", shape=(None, 2))
+        axes = [[-1], [-1]]
+        z = tensordot(x, y, axes=axes)
+        sz = tensordot(x, sy, axes=axes)
+
+        fg = FunctionGraph([x, y], [z])
+        assert not any(isinstance(n, Assert) for n in fg.toposort())
+        assert z.type.shape == (7, 3, 10)
+        assert z.broadcastable == (False, False, False)
+        assert np.allclose(np.tensordot(xv, yv, axes=axes), z.eval())
+
+        fg = FunctionGraph([x, sy], [sz])
+        assert not any(isinstance(n, Assert) for n in fg.toposort())
+        assert sz.type.shape == (7, 3, None)
+        assert z.broadcastable == (False, False, False)
+        assert np.allclose(np.tensordot(xv, yv, axes=axes), sz.eval({sy: yv}))
+
+    @pytest.mark.parametrize(
+        ["axes", "has_assert", "values", "expected_fail"],
+        [
+            ([[1], [2]], False, (np.ones((7, 3, 2)), np.ones((7, 2, 3))), False),
+            ([[1, 2], [2, 1]], True, (np.ones((7, 3, 2)), np.ones((7, 2, 3))), False),
+            ([[1, 2], [2, 1]], True, (np.ones((7, 3, 2)), np.ones((7, 5, 3))), True),
+        ],
+    )
+    def test_shape_assert(self, axes, has_assert, values, expected_fail):
+        x = tensor(shape=(7, 3, None))
+        y = tensor(shape=(None, None, 3))
+
+        xv, yv = values
+
+        # No assert should be present
+        z = tensordot(x, y, axes=axes)
+        fg = FunctionGraph([x, y], [z])
+        found_asserts = any(isinstance(n.op, Assert) for n in fg.toposort())
+        if has_assert:
+            assert found_asserts
+        else:
+            assert not found_asserts
+        if expected_fail:
+            with pytest.raises(
+                AssertionError,
+                match="Input array shape along reduced axes of tensordot are not equal",
+            ):
+                z.eval({x: xv, y: yv})
+        else:
+            assert np.allclose(np.tensordot(xv, yv, axes=axes), z.eval({x: xv, y: yv}))
+
 
 def test_smallest():
     x = dvector()