From 4bbc58faf30d9a83febb7d67b078b0aef53ed397 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 29 Dec 2022 17:25:48 +0300
Subject: [PATCH 1/3] change broadcasting behaviour

---
 pytensor/sparse/basic.py             |   2 +-
 pytensor/sparse/rewriting.py         |   4 +-
 pytensor/sparse/type.py              |   9 +-
 pytensor/tensor/basic.py             |   2 +-
 pytensor/tensor/elemwise.py          | 240 +++++++++++++++++----------
 pytensor/tensor/elemwise_cgen.py     |  47 ++----
 pytensor/tensor/rewriting/basic.py   |   9 +-
 pytensor/tensor/shape.py             |  26 ++-
 pytensor/tensor/type.py              |  90 ++++++----
 pytensor/tensor/var.py               |   2 +-
 tests/sparse/test_basic.py           |  18 +-
 tests/tensor/rewriting/test_basic.py |   8 +-
 tests/tensor/rewriting/test_shape.py |  71 ++++++--
 tests/tensor/test_elemwise.py        | 234 +++++++++++++++++++++-----
 tests/tensor/test_shape.py           |   5 +-
 tests/tensor/test_type.py            |  17 +-
 16 files changed, 532 insertions(+), 252 deletions(-)

diff --git a/pytensor/sparse/basic.py b/pytensor/sparse/basic.py
index 025bf2170d..4f180ba062 100644
--- a/pytensor/sparse/basic.py
+++ b/pytensor/sparse/basic.py
@@ -954,7 +954,7 @@ class DenseFromSparse(Op):
 
     """
 
-    __props__ = ()
+    __props__ = ("sparse_grad",)
 
     def __init__(self, structured=True):
         self.sparse_grad = structured
diff --git a/pytensor/sparse/rewriting.py b/pytensor/sparse/rewriting.py
index 73e7d3d8dc..12b6f109e9 100644
--- a/pytensor/sparse/rewriting.py
+++ b/pytensor/sparse/rewriting.py
@@ -1099,13 +1099,13 @@ def c_code_cache_version(self):
 csm_grad_c = CSMGradC()
 
 
-@node_rewriter([csm_grad(None)])
+@node_rewriter([csm_grad()])
 def local_csm_grad_c(fgraph, node):
     """
     csm_grad(None) -> csm_grad_c
 
     """
-    if node.op == csm_grad(None):
+    if node.op == csm_grad():
         return [csm_grad_c(*node.inputs)]
     return False
 
diff --git a/pytensor/sparse/type.py b/pytensor/sparse/type.py
index c5fd52cced..d596c983d8 100644
--- a/pytensor/sparse/type.py
+++ b/pytensor/sparse/type.py
@@ -73,7 +73,10 @@ def __init__(
     ):
         if shape is None and broadcastable is None:
             shape = (None, None)
-
+        if broadcastable is None:
+            broadcastable = (False, False)
+        if broadcastable != (False, False):
+            raise ValueError("Broadcasting sparse types is not yet implemented")
         if format not in self.format_cls:
             raise ValueError(
                 f'unsupported format "{format}" not in list',
@@ -95,7 +98,9 @@ def clone(
             dtype = self.dtype
         if shape is None:
             shape = self.shape
-        return type(self)(format, dtype, shape=shape, **kwargs)
+        return type(self)(
+            format, dtype, shape=shape, broadcastable=broadcastable, **kwargs
+        )
 
     def filter(self, value, strict=False, allow_downcast=None):
         if isinstance(value, Variable):
diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 13e8b7c75a..c967699fb5 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -396,7 +396,7 @@ def get_scalar_constant_value(
                         for i in v.owner.inputs
                     ]
                     ret = [[None]]
-                    v.owner.op.perform(v.owner, const, ret)
+                    v.owner.op.scalar_op.perform(v.owner, const, ret)
                     return np.asarray(ret[0][0].copy())
             elif (
                 isinstance(v.owner.op, pytensor.tensor.subtensor.Subtensor)
diff --git a/pytensor/tensor/elemwise.py b/pytensor/tensor/elemwise.py
index 17d4c5776b..c6b4c344c7 100644
--- a/pytensor/tensor/elemwise.py
+++ b/pytensor/tensor/elemwise.py
@@ -1,5 +1,5 @@
 from copy import copy
-from typing import List, Tuple
+from typing import List, Optional, Sequence, Tuple
 
 import numpy as np
 
@@ -154,11 +154,11 @@ def __init__(self, input_broadcastable, new_order):
 
         # List of input dimensions to drop
         drop = []
-        for i, b in enumerate(input_broadcastable):
+        for i, bcasted in enumerate(input_broadcastable):
             if i not in new_order:
                 # We want to drop this dimension because it's not a value in
                 # `new_order`
-                if b == 1:
+                if bcasted:
                     drop.append(i)
                 else:
                     # We cannot drop non-broadcastable dimensions
@@ -187,13 +187,13 @@ def __setstate__(self, state):
 
     def make_node(self, _input):
         input = as_tensor_variable(_input)
-        ib = tuple(s == 1 for s in input.type.shape)
-        if ib != self.input_broadcastable:
-            if len(ib) != len(self.input_broadcastable):
-                raise TypeError(
-                    "The number of dimensions of the "
-                    f"input is incorrect for this op. Expected {self.input_broadcastable}, got {ib}."
-                )
+        ib = input.type.broadcastable
+        if len(ib) != len(self.input_broadcastable):
+            raise TypeError(
+                "The number of dimensions of the "
+                f"input is incorrect for this op. Expected {self.input_broadcastable}, got {ib}."
+            )
+        else:
             for expected, b in zip(self.input_broadcastable, ib):
                 if expected is True and b is False:
                     raise TypeError(
@@ -270,7 +270,7 @@ def grad(self, inp, grads):
             return [inp[0].zeros_like(dtype=config.floatX)]
         else:
             return [
-                DimShuffle(tuple(s == 1 for s in gz.type.shape), grad_order)(
+                DimShuffle(gz.type.broadcastable, grad_order)(
                     Elemwise(scalar_identity)(gz)
                 )
             ]
@@ -303,6 +303,80 @@ def process(self, r, pstate):
 pprint.assign(DimShuffle, DimShufflePrinter())
 
 
+def broadcast_shapes(
+    shapes: Sequence[Tuple[Optional[int], ...]],
+    broadcast_patterns: Sequence[Tuple[bool, ...]],
+):
+    """Broadcast shapes or raise an error.
+
+    This function already assumes all shapes have equal length, e.g. after Dimsuffle.
+
+    Parameters
+    ----------
+    shapes : Sequence[Tuple[Optional[int], ...]]
+        input shapes to broadcast
+    broadcast_patterns : Sequence[Tuple[bool,...]]
+        input broadcasting pattern constraints
+
+    Returns
+    -------
+    Tuple[Optional[int], ...], Tuple[bool,...]
+        shape and broadcasting pattern
+    """
+
+    def get_most_specialized_shape(shapes, bcasting):
+        seen_dims = set(zip(shapes, bcasting))
+        if len(seen_dims) == 1:
+            return next(iter(seen_dims))[0]
+        elif len(seen_dims) == 2 and (1, True) in seen_dims:
+            # this is fine since one dimension broadcasts to another common dimension
+            seen_dims.discard((1, True))
+            return next(iter(seen_dims))[0]
+        # we have set length >= 2 and it is not the case (1, True) in seen_dims
+        # let's drops dims that do not matter in comparison
+        # do not care about 1 that broadcasts
+        # the simple case was checked above so two discard
+        # never manage to produce an empty set
+        seen_dims.discard((1, True))
+        # do not care about unknown that does not broadcast because it
+        # should anyway match to known that did not broadcast
+        seen_dims.discard((None, False))
+        if len(seen_dims) > 1:
+            # we did not manage to specialize dims, raise an error
+            raise ValueError(f"shapes and broadcast mismatch: {seen_dims}")
+        return next(iter(seen_dims))[0]
+
+    # it is multiplied by nout because Elemwise supports multiple outputs
+    # (nout of them)
+    if len(shapes) < 1 or len(shapes) != len(broadcast_patterns):
+        raise ValueError(
+            "Length of arguments should be more than one and equal for shapes and broadcast patterns"
+        )
+    common_len = len(shapes[0])
+    if any(len(s) != common_len for s in shapes + broadcast_patterns):
+        raise ValueError("Shapes length mismatch, all lengths should be the same")
+    try:
+        out_shape = tuple(
+            get_most_specialized_shape(shape, bcasting)
+            for shape, bcasting in zip(
+                zip(*shapes),
+                zip(*broadcast_patterns),
+            )
+        )
+        out_broadcastable = tuple(all(bcast) for bcast in zip(*broadcast_patterns))
+    except ValueError as e:
+        raise ValueError(
+            "Incompatible Elemwise input broadcasting pattern: "
+            f"{broadcast_patterns}. "
+            "Pytensor has static broadcasting befaviour as it "
+            "simplifies gradients and execution graph dramatically. "
+            "So even input shapes may contain ones they should be "
+            "explicitly be marked broadcastable and your current shapes are "
+            f"{shapes}"
+        ) from e
+    return out_shape, out_broadcastable
+
+
 class Elemwise(OpenMPOp):
     """Generalizes a scalar `Op` to tensors.
 
@@ -407,57 +481,28 @@ def get_output_info(self, dim_shuffle, *inputs):
                 # TODO: use LComplete instead
                 args.append(
                     dim_shuffle(
-                        tuple(1 if s == 1 else None for s in input.type.shape),
+                        input.type.broadcastable,
                         ["x"] * difference + list(range(length)),
                     )(input)
                 )
         inputs = args
-
-        # HERE: all the broadcast dims have the same length now
-
-        # cleverness: we iterate over the first, second, third broadcast flag
-        # of all inputs in parallel... the all() gives us each output
-        # broadcastable bit in turn.
-
-        def get_most_specialized_shape(shapes):
-            shapes = set(shapes)
-            # All shapes are the same
-            if len(shapes) == 1:
-                return tuple(shapes)[0]
-
-            # Only valid indeterminate case
-            if shapes == {None, 1}:
-                return None
-
-            shapes.discard(1)
-            shapes.discard(None)
-            if len(shapes) > 1:
-                raise ValueError
-            return tuple(shapes)[0]
-
-        # it is multiplied by nout because Elemwise supports multiple outputs
-        # (nout of them)
-        try:
-            out_shapes = [
-                [
-                    get_most_specialized_shape(shape)
-                    for shape in zip(*[inp.type.shape for inp in inputs])
-                ]
-            ] * shadow.nout
-        except ValueError:
-            raise ValueError(
-                f"Incompatible Elemwise input shapes {[inp.type.shape for inp in inputs]}"
-            )
-
+        out_shapes, out_broadcastable = broadcast_shapes(
+            [inp.type.shape for inp in inputs],
+            [inp.type.broadcastable for inp in inputs],
+        )
+        out_shapes, out_broadcastable = [out_shapes] * shadow.nout, [
+            out_broadcastable
+        ] * shadow.nout
         # inplace_pattern maps output idx -> input idx
         inplace_pattern = self.inplace_pattern
         if inplace_pattern:
             for overwriter, overwritten in inplace_pattern.items():
-                for out_s, in_s in zip(
-                    out_shapes[overwriter],
-                    inputs[overwritten].type.shape,
+                for out_b, in_b in zip(
+                    out_broadcastable[overwriter],
+                    inputs[overwritten].type.broadcastable,
                 ):
-                    if in_s == 1 and out_s != 1:
+                    # the dimension lost its broadcasting property
+                    if in_b and not out_b:
                         raise ValueError(
                             "Operation cannot be done inplace on an input "
                             "with broadcasted dimensions."
@@ -474,7 +519,7 @@ def get_most_specialized_shape(shapes):
                 )
             )
         assert len(out_dtypes) == len(out_shapes)
-        return out_dtypes, out_shapes, inputs
+        return out_dtypes, out_shapes, out_broadcastable, inputs
 
     def make_node(self, *inputs):
         """
@@ -483,10 +528,12 @@ def make_node(self, *inputs):
         using DimShuffle.
         """
         inputs = [as_tensor_variable(i) for i in inputs]
-        out_dtypes, out_shapes, inputs = self.get_output_info(DimShuffle, *inputs)
+        out_dtypes, out_shapes, out_broadcastable, inputs = self.get_output_info(
+            DimShuffle, *inputs
+        )
         outputs = [
-            TensorType(dtype=dtype, shape=shape)()
-            for dtype, shape in zip(out_dtypes, out_shapes)
+            TensorType(dtype=dtype, shape=shape, broadcastable=bcasting)()
+            for dtype, shape, bcasting in zip(out_dtypes, out_shapes, out_broadcastable)
         ]
         return Apply(self, inputs, outputs)
 
@@ -543,8 +590,6 @@ def connection_pattern(self, node):
         return [[True for output in node.outputs] for ipt in node.inputs]
 
     def L_op(self, inputs, outs, ograds):
-        from pytensor.tensor.math import sum as at_sum
-
         # Compute grad with respect to broadcasted input
         rval = self._bgrad(inputs, outs, ograds)
 
@@ -577,16 +622,17 @@ def L_op(self, inputs, outs, ograds):
             # List of all the dimensions that are broadcastable for input[i] so
             # we can sum over them
             # TODO: only count dimensions that were effectively broadcasted
+            # the comment was introduced there
+            # https://github.com/Theano/Theano/commit/1ddd6c38a7a627bab8ee1a4c3d45295fc9c6aace
             to_sum = [
                 j
-                for j, in_s in enumerate(ipt.type.shape)
-                if in_s == 1 and outs[0].type.shape[j] != 1
+                for j, in_broadcastable in enumerate(ipt.type.broadcastable)
+                if in_broadcastable and not outs[0].type.broadcastable[j]
             ]
-
             if to_sum:
-                sr = at_sum(rval[i], axis=to_sum, keepdims=True)
+                sr = pytensor.tensor.math.sum(rval[i], axis=to_sum, keepdims=True)
+                # TODO: check if the rval type is the same as ipt type
                 rval[i] = sr
-
         return rval
 
     def _bgrad(self, inputs, outputs, ograds):
@@ -735,9 +781,13 @@ def perform(self, node, inputs, output_storage):
             # FIXME: This no longer calls the C implementation!
             super().perform(node, inputs, output_storage)
 
-        for d, dim_shapes in enumerate(zip(*(i.shape for i in inputs))):
-            if len(set(dim_shapes) - {1}) > 1:
-                raise ValueError(f"Shapes on dimension {d} do not match: {dim_shapes}")
+        # the output of broadcast_shapes is not used, but checks if all the shapes comply
+        broadcast_shapes(
+            # TODO: additionally check against static shapes in input types
+            # actual shapes + static shapes in the node, all should comply
+            [input.shape for input in inputs],
+            [i.type.broadcastable for i in node.inputs],
+        )
 
         ufunc_args = inputs
         ufunc_kwargs = {}
@@ -814,18 +864,38 @@ def perform(self, node, inputs, output_storage):
                 storage[0] = variable
 
     def infer_shape(self, fgraph, node, i_shapes) -> List[Tuple[TensorVariable, ...]]:
-
-        if len(node.outputs) > 1:
-            from pytensor.tensor.exceptions import ShapeError
-
-            raise ShapeError(
-                "Multiple outputs are not supported by the default `Elemwise.infer_shape`"
+        # some shapes can be already static, some unknown and None
+        # in the make node we copy shape and broadcasting patterns
+        # use this hint here
+        out_shape = list(node.outputs[0].type.shape)
+        for i, (o_size, i_sizes, i_bcasting) in enumerate(
+            zip(
+                out_shape,
+                zip(*i_shapes),
+                zip(*(i.type.broadcastable for i in node.inputs)),
             )
-
-        out_shape = pytensor.tensor.broadcast_shape(*i_shapes, arrays_are_shapes=True)
-
-        # The `as_tensor_variable` should convert `ScalarType`s to `TensorType`s
-        return [tuple(as_tensor_variable(s) for s in out_shape)]
+        ):
+            if o_size is None:
+                # the unknown shape, should be inferred from some of the inputs
+                candidates = [
+                    s for s, bcasted in zip(i_sizes, i_bcasting) if not bcasted
+                ]
+                # None shape can't be broadcasted so it for sure has broadcasting=False
+                # thus some of the inputs have to have the non broadcastable dimension
+                # NOTE: It appears that user can break that assumption in some custom op,
+                # manually creating output nodes
+                if len(candidates) == 0:
+                    from pytensor.tensor.rewriting.shape import ShapeError
+
+                    raise ShapeError(
+                        "Encountered a non broadcasting unknown dimension in elemwise, "
+                        "but all the input dims were broadcastable. "
+                        "This can happen if custom make_node did not follow broadcasting conventions"
+                    )
+                    # TODO: which best guess is better then 1 or i_sizes[0] or max(i_sizes)?
+                out_shape[i] = candidates[0]
+        # shape entries are scalars, so return tuple of scalars
+        return [tuple(as_tensor_variable(s) for s in out_shape)] * len(node.outputs)
 
     def _c_all(self, node, nodename, inames, onames, sub):
         # Some `Op`s directly call `Elemwise._c_all` or `Elemwise.c_code`
@@ -888,7 +958,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
         # for each input:
         # same as range(ndim), but with 'x' at all broadcastable positions
         orders = [
-            [s == 1 and "x" or i for i, s in enumerate(input.type.shape)]
+            [bcast and "x" or i for i, bcast in enumerate(input.type.broadcastable)]
             for input in inputs
         ]
 
@@ -911,7 +981,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
             [
                 f"PyArray_ISFORTRAN({arr})"
                 for arr, var in z
-                if not all(s == 1 for s in var.type.shape)
+                if not all(var.type.broadcastable)
             ]
         )
         # If it is a scalar, make it c contig to prevent problem with
@@ -996,7 +1066,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
             or
             # Use simpler code when output ndim == 0 or 1
             # or for broadcated scalar.
-            all(s == 1 for s in node.outputs[0].type.shape)
+            all(node.outputs[0].type.broadcastable)
         ):
             if nnested:
                 all_code = [("", "")] * (nnested - 1) + [("", code)] + [""]
@@ -1068,7 +1138,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
             all(o.ndim >= 1 for o in node.outputs)
             and
             # Don't use the contig code for broadcasted scalar.
-            not all(s == 1 for s in node.outputs[0].type.shape)
+            not all(node.outputs[0].type.broadcastable)
         ):
             contig = None
             try:
@@ -1101,7 +1171,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
                     """
                     index = ""
                     for x, var in zip(inames + onames, inputs + node.outputs):
-                        if not all(s == 1 for s in var.type.shape):
+                        if not all(var.type.broadcastable):
                             contig += (
                                 """
             dtype_%(x)s * %(x)s_ptr = (dtype_%(x)s*) PyArray_DATA(%(x)s);
@@ -1135,7 +1205,7 @@ def _c_all(self, node, nodename, inames, onames, sub):
                     )
             if contig is not None:
                 z = list(zip(inames + onames, inputs + node.outputs))
-                all_broadcastable = all(s == 1 for s in var.type.shape)
+                all_broadcastable = all(var.type.broadcastable)
                 cond1 = " && ".join(
                     [
                         "PyArray_ISCONTIGUOUS(%s)" % arr
@@ -1189,7 +1259,7 @@ def c_support_code_apply(self, node, nodename):
         return support_code
 
     def c_code_cache_version_apply(self, node):
-        version = [14]  # the version corresponding to the c code in this Op
+        version = [15]  # the version corresponding to the c code in this Op
 
         # now we insert versions for the ops on which we depend...
         scalar_node = Apply(
diff --git a/pytensor/tensor/elemwise_cgen.py b/pytensor/tensor/elemwise_cgen.py
index a7a333afdc..54efec16be 100644
--- a/pytensor/tensor/elemwise_cgen.py
+++ b/pytensor/tensor/elemwise_cgen.py
@@ -92,41 +92,22 @@ def make_checks(loop_orders, dtypes, sub):
         # elements of to_compare are pairs ( input_variable_idx, input_variable_dim_idx )
         if len(to_compare) < 2:
             continue
-
-        # Find first dimension size that is != 1
-        jl, xl = to_compare[-1]
-        non1size_dim_check = f"""
-            npy_intp non1size_dim{xl};
-            non1size_dim{xl} = """
-        for (j, x) in to_compare[:-1]:
-            non1size_dim_check += f"(%(lv{j})s_n{x} != 1) ? %(lv{j})s_n{x} : "
-        non1size_dim_check += f"%(lv{jl})s_n{xl};"
-        check += non1size_dim_check
-
-        # Check the nonsize1 dims match
-        # TODO: This is a bit inefficient because we are comparing one dimension against itself
-        check += f"""
-            if (non1size_dim{xl} != 1)
-            {{
-        """
-        for (j, x) in to_compare:
+        j0, x0 = to_compare[0]
+        for (j, x) in to_compare[1:]:
             check += f"""
-                if ((%(lv{j})s_n{x} != non1size_dim{x}) && (%(lv{j})s_n{x} != 1))
-                {{
-                    PyErr_Format(PyExc_ValueError, "Input dimension mismatch. One other input has shape[%%i] = %%lld, but input[%%i].shape[%%i] = %%lld.",
-                       {x},
-                       (long long int) non1size_dim{x},
-                       {j},
-                       {x},
-                       (long long int) %(lv{j})s_n{x}
-                    );
-                    %(fail)s
-                }}
-            """
-        check += """
-            }
+            if (%(lv{j0})s_n{x0} != %(lv{j})s_n{x})
+            {{
+                PyErr_Format(PyExc_ValueError, "Input dimension mismatch implicit broadcasting is not supported. (input[%%i].shape[%%i] = %%lld, input[%%i].shape[%%i] = %%lld)",
+                   {j0},
+                   {x0},
+                   (long long int) %(lv{j0})s_n{x0},
+                   {j},
+                   {x},
+                   (long long int) %(lv{j})s_n{x}
+                );
+                %(fail)s
+            }}
         """
-
     return init % sub + check % sub
 
 
diff --git a/pytensor/tensor/rewriting/basic.py b/pytensor/tensor/rewriting/basic.py
index a40320773c..70179bcca9 100644
--- a/pytensor/tensor/rewriting/basic.py
+++ b/pytensor/tensor/rewriting/basic.py
@@ -45,7 +45,7 @@
 from pytensor.tensor.extra_ops import broadcast_shape, broadcast_to
 from pytensor.tensor.math import all as at_all
 from pytensor.tensor.math import eq
-from pytensor.tensor.shape import Shape_i
+from pytensor.tensor.shape import Shape_i, specify_shape
 from pytensor.tensor.sort import TopKOp
 from pytensor.tensor.type import DenseTensorType, TensorType
 from pytensor.tensor.var import TensorConstant
@@ -89,7 +89,7 @@ def merge_broadcastables(broadcastables):
 
 def broadcast_like(value, template, fgraph, dtype=None):
     """
-    Return a Variable with the same shape and dtype as the template,
+    Return a Variable with the same shape. broadcastable and dtype as the template,
     filled by broadcasting value through it. `value` will be cast as
     necessary.
 
@@ -113,7 +113,8 @@ def broadcast_like(value, template, fgraph, dtype=None):
         new_shape = template.shape
     rval = alloc(value, *new_shape)
     assert rval.type.dtype == dtype
-
+    rval = specify_shape(rval, shape=new_shape, broadcastable=template.broadcastable)
+    assert rval.broadcastable == template.broadcastable
     return rval
 
 
@@ -1157,6 +1158,7 @@ def constant_folding(fgraph, node):
                 output.type.dtype,
                 shape=data.shape,
                 name=output.type.name,
+                broadcastable=output.type.broadcastable,
             )
         else:
             output_type = output.type
@@ -1167,7 +1169,6 @@ def constant_folding(fgraph, node):
         # and not "broaden" them.  This is a case in which types are
         # unnecessarily "broadened"
         # assert not hasattr(output.type, "broadcastable") or output.type.broadcastable == tuple(s == 1 for s in data.shape)
-
         copy_stack_trace(output, v)
 
         rval.append(v)
diff --git a/pytensor/tensor/shape.py b/pytensor/tensor/shape.py
index 219ee20bd9..eaea0669ae 100644
--- a/pytensor/tensor/shape.py
+++ b/pytensor/tensor/shape.py
@@ -1,7 +1,7 @@
 import warnings
 from numbers import Number
 from textwrap import dedent
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Sequence, Tuple, Union
 
 import numpy as np
 
@@ -392,10 +392,15 @@ class SpecifyShape(COp):
     Maybe in the future we will never do the assert!
     """
 
+    __props__ = ("output_broadcastable",)
     view_map = {0: [0]}
     __props__ = ()
     _f16_ok = True
 
+    def __init__(self, output_broadcastable: Sequence) -> None:
+        super().__init__()
+        self.output_broadcastable = tuple(output_broadcastable)
+
     def make_node(self, x, *shape):
         from pytensor.tensor.basic import get_scalar_constant_value
 
@@ -432,7 +437,9 @@ def make_node(self, x, *shape):
                 except NotScalarConstantError:
                     pass
 
-        out_var = x.type.clone(shape=type_shape)()
+        out_var = x.type.clone(
+            shape=type_shape, broadcastable=self.output_broadcastable
+        )()
 
         return Apply(self, [x, *shape], [out_var])
 
@@ -539,12 +546,10 @@ def c_code_cache_version(self):
         return (2,)
 
 
-_specify_shape = SpecifyShape()
-
-
 def specify_shape(
     x: Union[np.ndarray, Number, Variable],
     shape: Union[ShapeValueType, List[ShapeValueType], Tuple[ShapeValueType, ...]],
+    broadcastable=None,
 ):
     """Specify a fixed shape for a `Variable`.
 
@@ -574,8 +579,9 @@ def specify_shape(
     # If shape does not match x.ndim, we rely on the `Op` to raise a ValueError
     if not new_shape_info and len(shape) == x.type.ndim:
         return x
-
-    return _specify_shape(x, *shape)
+    if broadcastable is None:
+        broadcastable = x.broadcastable
+    return SpecifyShape(broadcastable)(x, *shape)
 
 
 @_get_vector_length.register(SpecifyShape)
@@ -934,7 +940,11 @@ def specify_broadcastable(x, *axes):
         raise ValueError("Trying to specify broadcastable of non-existent dimension")
 
     shape_info = [1 if i in axes else s for i, s in enumerate(x.type.shape)]
-    return specify_shape(x, shape_info)
+    broadcastable = [
+        True if i in axes else b for i, b in enumerate(x.type.broadcastable)
+    ]
+
+    return specify_shape(x, shape_info, broadcastable=broadcastable)
 
 
 class Unbroadcast(COp):
diff --git a/pytensor/tensor/type.py b/pytensor/tensor/type.py
index d1826ff674..a8ab46e068 100644
--- a/pytensor/tensor/type.py
+++ b/pytensor/tensor/type.py
@@ -1,6 +1,5 @@
 import logging
-import warnings
-from typing import TYPE_CHECKING, Iterable, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Literal, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
@@ -54,6 +53,38 @@
 }
 
 
+def infer_shape_broadcastable(
+    shape: Optional[Sequence[Optional[int]]] = None,
+    broadcastable: Optional[Sequence[bool]] = None,
+) -> Tuple[Tuple[Union[int, None], ...], Tuple[bool, ...]]:
+    if broadcastable is not None:
+        broadcastable = tuple(broadcastable)
+    if shape is not None:
+        shape = tuple(shape)
+    if broadcastable is not None and shape is not None:
+        # check that broadcast semantics match shapes
+        if len(broadcastable) != len(shape):
+            raise ValueError(
+                "broadcastable and shape should match in lengths, "
+                f"got {len(broadcastable)} vs {len(shape)}"
+            )
+        # check one by one for more specific error messages
+        for s, b in zip(shape, broadcastable):
+            if s is None and b:
+                raise ValueError(
+                    "Unknown dims can't be broadcastable or should be set explicitly to ones"
+                )
+            elif s is not None and s != 1 and b:
+                raise ValueError("shapes that are not ones can't be set broadcastable")
+    elif shape is not None:
+        broadcastable = tuple(shp == 1 if shp is not None else False for shp in shape)
+    elif broadcastable is not None:
+        shape = tuple(None if bcast is False else 1 for bcast in broadcastable)
+    else:
+        raise TypeError("broadcastable or shape should be provided")
+    return shape, broadcastable
+
+
 class TensorType(CType[np.ndarray], HasDataType, HasShape):
     r"""Symbolic `Type` representing `numpy.ndarray`\s."""
 
@@ -70,34 +101,31 @@ class TensorType(CType[np.ndarray], HasDataType, HasShape):
     def __init__(
         self,
         dtype: Union[str, np.dtype],
-        shape: Optional[Iterable[Optional[Union[bool, int]]]] = None,
+        shape: Optional[Sequence[Optional[int]]] = None,
         name: Optional[str] = None,
-        broadcastable: Optional[Iterable[bool]] = None,
+        broadcastable: Optional[Sequence[bool]] = None,
     ):
         r"""
 
         Parameters
         ----------
-        dtype
+        dtype : Union[str, np.dtype]
             A NumPy dtype (e.g. ``"int64"``).
-        shape
+        shape : Tuple[Optional[int]]
             The static shape information.  ``None``\s are used to indicate
             unknown shape values for their respective dimensions.
             If `shape` is a list of ``bool``\s, the ``True`` elements of are
             converted to ``1``\s and the ``False`` values are converted to
             ``None``\s.
-        name
+        name : str
             Optional name for this type.
+        broadcastable: Sequence[bool]
 
+        Notes
+        -----
+        Either of shape or broadcastable should be present to infer dims
         """
-
-        if broadcastable is not None:
-            warnings.warn(
-                "The `broadcastable` keyword is deprecated; use `shape`.",
-                DeprecationWarning,
-            )
-            shape = broadcastable
-
+        shape, broadcastable = infer_shape_broadcastable(shape, broadcastable)
         if str(dtype) == "floatX":
             self.dtype = config.floatX
         else:
@@ -106,31 +134,24 @@ def __init__(
 
             self.dtype = np.dtype(dtype).name
 
-        def parse_bcast_and_shape(s):
-            if isinstance(s, (bool, np.bool_)):
-                return 1 if s else None
-            else:
-                return s
-
-        self.shape = tuple(parse_bcast_and_shape(s) for s in shape)
+        self.shape = shape
         self.dtype_specs()  # error checking is done there
         self.name = name
+        self.broadcastable = broadcastable
         self.numpy_dtype = np.dtype(self.dtype)
 
     def clone(
         self, dtype=None, shape=None, broadcastable=None, **kwargs
     ) -> "TensorType":
-        if broadcastable is not None:
-            warnings.warn(
-                "The `broadcastable` keyword is deprecated; use `shape`.",
-                DeprecationWarning,
-            )
-            shape = broadcastable
+        if broadcastable is None and shape is None:
+            # only default to passby broadcast when shape is also None
+            # otherwise shape changes affect broadcasting patterns and
+            # inconsistencies
+            broadcastable = self.broadcastable
+            shape = self.shape
         if dtype is None:
             dtype = self.dtype
-        if shape is None:
-            shape = self.shape
-        return type(self)(dtype, shape, name=self.name)
+        return type(self)(dtype, shape, name=self.name, broadcastable=broadcastable)
 
     def filter(self, data, strict=False, allow_downcast=None):
         """Convert `data` to something which can be associated to a `TensorVariable`.
@@ -321,6 +342,8 @@ def is_super(self, otype):
             # `otype` is allowed to be as or more shape-specific than `self`,
             # but not less
             and all(sb == ob or sb is None for sb, ob in zip(self.shape, otype.shape))
+            # broadcastable should be the same
+            and otype.broadcastable == self.broadcastable
         ):
             return True
 
@@ -372,11 +395,6 @@ def __eq__(self, other):
     def __hash__(self):
         return hash((type(self), self.dtype, self.shape))
 
-    @property
-    def broadcastable(self):
-        """A boolean tuple indicating which dimensions have a shape equal to one."""
-        return tuple(s == 1 for s in self.shape)
-
     @property
     def ndim(self):
         """The number of dimensions."""
diff --git a/pytensor/tensor/var.py b/pytensor/tensor/var.py
index 1d74955237..1d898940ba 100644
--- a/pytensor/tensor/var.py
+++ b/pytensor/tensor/var.py
@@ -1018,7 +1018,7 @@ def __init__(self, type: _TensorTypeType, data, name=None):
             )
 
         # We want all the shape information from `data`
-        new_type = type.clone(shape=data_shape)
+        new_type = type.clone(shape=data_shape, broadcastable=type.broadcastable)
 
         assert not any(s is None for s in new_type.shape)
 
diff --git a/tests/sparse/test_basic.py b/tests/sparse/test_basic.py
index d106f5ae38..33a29b1955 100644
--- a/tests/sparse/test_basic.py
+++ b/tests/sparse/test_basic.py
@@ -1093,21 +1093,23 @@ def check_format_ndim(format, ndim):
         s = SparseFromDense(format)(x)
         s_m = -s
         d = dense_from_sparse(s_m)
+        pytensor.grad(None, x, known_grads={d: d.type()})
         c = d.sum()
         g = pytensor.grad(c, x)
         f = pytensor.function([x], [s, g])
         f(np.array(0, dtype=config.floatX, ndmin=ndim))
         f(np.array(7, dtype=config.floatX, ndmin=ndim))
 
-    def test_format_ndim(self):
-        for format in "csc", "csr":
-            for ndim in 0, 1, 2:
-                self.check_format_ndim(format, ndim)
+    @pytest.mark.parametrize("format", ["csc", "csr"])
+    @pytest.mark.parametrize("ndim", [0, 1, 2])
+    def test_format_ndim(self, format, ndim):
+        self.check_format_ndim(format, ndim)
 
-            with pytest.raises(TypeError):
-                self.check_format_ndim(format, 3)
-            with pytest.raises(TypeError):
-                self.check_format_ndim(format, 4)
+    @pytest.mark.parametrize("format", ["csc", "csr"])
+    @pytest.mark.parametrize("ndim", [3, 4])
+    def test_format_ndim_raises(self, format, ndim):
+        with pytest.raises(TypeError):
+            self.check_format_ndim(format, ndim)
 
 
 class TestCsmProperties:
diff --git a/tests/tensor/rewriting/test_basic.py b/tests/tensor/rewriting/test_basic.py
index 144fcf0eb9..5560735e17 100644
--- a/tests/tensor/rewriting/test_basic.py
+++ b/tests/tensor/rewriting/test_basic.py
@@ -1693,8 +1693,12 @@ def verify_op_count(f, count, cls):
         ],
     )
     def test_basic(self, expr, x_shape, y_shape):
-        x = at.tensor(dtype="int64", shape=(None,) * len(x_shape), name="x")
-        y = at.tensor(dtype="int64", shape=(None,) * len(y_shape), name="y")
+        x = at.tensor(
+            dtype="int64", broadcastable=tuple(s == 1 for s in x_shape), name="x"
+        )
+        y = at.tensor(
+            dtype="int64", broadcastable=tuple(s == 1 for s in y_shape), name="y"
+        )
         z = expr(x, y)
 
         z_opt = pytensor.function(
diff --git a/tests/tensor/rewriting/test_shape.py b/tests/tensor/rewriting/test_shape.py
index c23f2e53a0..fefa10ca7a 100644
--- a/tests/tensor/rewriting/test_shape.py
+++ b/tests/tensor/rewriting/test_shape.py
@@ -417,35 +417,78 @@ def test_vector(self):
         fgraph.attach_feature(shape_feature)
         assert shape_feature.same_shape(x, o)
 
-    def test_no_static_shapes(self):
+    @pytest.mark.parametrize(
+        "setup",
+        [
+            ("x", "o"),
+            pytest.param(("x", "y"), marks=pytest.mark.xfail(reason="Not implemented")),
+            pytest.param(("y", "o"), marks=pytest.mark.xfail(reason="Not implemented")),
+        ],
+        ids=lambda case: "{}.0=={}.0".format(*case),
+    )
+    def test_no_static_shapes(self, setup):
+        """All the shapes are actually equal and shape inference
+        should handle all the comparisons
+        """
+        _1, _2 = setup
         x = vector()
         y = vector()
         o = x + y
         fgraph = FunctionGraph([x, y], [o], clone=False)
         shape_feature = ShapeFeature()
         fgraph.attach_feature(shape_feature)
-        # We no longer assume that `x` has the same shape as `y` simply because
-        # neither has static shape information.  Instead, when there is no
-        # static shape information is available, we assume that `x` and/or `y`
-        # could have shapes `(1,)` and/or `(n,)`, where `n != 1`, or any
-        # combination of the two.
-        assert not shape_feature.same_shape(x, o)
-        # The following case isn't implemented
-        assert not shape_feature.same_shape(y, o)
+        # We assume that `x` has the same shape as `y` simply because
+        # neither broadcast. Same is for `o`
+        variables = dict(x=x, y=y, o=o)
+        assert shape_feature.same_shape(variables[_1], variables[_2], 0, 0)
 
     @pytest.mark.parametrize(
-        "y_dim_0",
-        [2, pytest.param(None, marks=pytest.mark.xfail(reason="Not implemented"))],
+        "setup",
+        [
+            # it has to be that verbose since
+            # specific combinations are impossible
+            # to infer so far due to limitations of
+            # the shape inference
+            (2, "x", "o", 0),
+            (2, "y", "o", 0),
+            (2, "x", "y", 0),
+            (2, "x", "o", 1),
+            pytest.param(
+                (2, "y", "o", 1), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+            pytest.param(
+                (2, "x", "y", 1), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+            (None, "x", "o", 0),
+            pytest.param(
+                (None, "y", "o", 0), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+            pytest.param(
+                (None, "x", "y", 0), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+            (None, "x", "o", 1),
+            pytest.param(
+                (None, "y", "o", 1), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+            pytest.param(
+                (None, "x", "y", 1), marks=pytest.mark.xfail(reason="Not implemented")
+            ),
+        ],
+        ids=lambda case: "{1}.{3}=={2}.{3}|ydim={0}".format(*case),
     )
-    def test_vector_dim(self, y_dim_0):
+    def test_vector_dim(self, setup):
+        """All the shapes are actually equal and shape inference
+        should handle all the comparisons
+        """
+        y_dim_0, _1, _2, dim = setup
         x = at.tensor(dtype="floatX", shape=(2, None))
         y = at.tensor(dtype="floatX", shape=(y_dim_0, None))
         o = x + y
         fgraph = FunctionGraph([x, y], [o], clone=False)
         shape_feature = ShapeFeature()
         fgraph.attach_feature(shape_feature)
-        assert shape_feature.same_shape(x, o, 0, 0)
-        assert not shape_feature.same_shape(x, o, 1, 1)
+        variables = dict(x=x, y=y, o=o)
+        assert shape_feature.same_shape(variables[_1], variables[_2], dim, dim)
 
     def test_vector_dim_err(self):
         x = vector()
diff --git a/tests/tensor/test_elemwise.py b/tests/tensor/test_elemwise.py
index b91d26dd6c..1e85d9daeb 100644
--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -17,8 +17,7 @@
 from pytensor.link.c.basic import CLinker, OpWiseCLinker
 from pytensor.tensor import as_tensor_variable
 from pytensor.tensor.basic import second
-from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from pytensor.tensor.exceptions import ShapeError
+from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise, broadcast_shapes
 from pytensor.tensor.math import all as at_all
 from pytensor.tensor.math import any as at_any
 from pytensor.tensor.math import exp
@@ -34,11 +33,23 @@
     vector,
     vectors,
 )
+from pytensor.tensor.var import TensorVariable
 from tests import unittest_tools
 from tests.link.test_link import make_function
 from tests.tensor.test_math import reduce_bitwise_and
 
 
+@pytest.fixture(
+    params=[
+        "py",
+        pytest.param("c", marks=pytest.mark.skipif("not pytensor.config.cxx")),
+        # TODO: automate this to add more linker modes, e.g. Jax, Numba
+    ]
+)
+def mode(request):
+    return Mode(linker=request.param)
+
+
 class TestDimShuffle(unittest_tools.InferShapeTester):
     op = DimShuffle
     type = TensorType
@@ -228,6 +239,8 @@ def with_linker(self, linker, op, type, rand_val):
                 ((2, 3, 4, 5), (1, 1, 1, 1)),
                 ((), ()),
             ]:
+                # TODO: move test cases outside the test
+                exc = None
                 if shape_info == "complete":
                     x_type = type(pytensor.config.floatX, shape=xsh)
                     y_type = type(pytensor.config.floatX, shape=ysh)
@@ -245,7 +258,12 @@ def with_linker(self, linker, op, type, rand_val):
                 else:
                     x_type = type(pytensor.config.floatX, shape=[None for _ in xsh])
                     y_type = type(pytensor.config.floatX, shape=[None for _ in ysh])
-
+                    # TODO: refactor this test into fixtures and proper test cases
+                    # the error should be itself a fixture that takes 2 shape cases
+                    # or a helper that takes 2 shapes, 2 broadcast patterns and
+                    # checks if they can broadcast or a fixed behaviour for given shapes
+                    if any(s1 != s2 for s1, s2 in zip(xsh, ysh)):
+                        exc = ValueError(".*broadcasting.*")
                 x = x_type("x")
                 y = y_type("y")
                 e = op(aes.add)(x, y)
@@ -254,8 +272,12 @@ def with_linker(self, linker, op, type, rand_val):
                 yv = rand_val(ysh)
                 zv = xv + yv
 
-                unittest_tools.assert_allclose(f(xv, yv), zv)
-
+                if exc is None:
+                    unittest_tools.assert_allclose(f(xv, yv), zv)
+                else:
+                    with pytest.raises(exc.__class__, match=exc.args[0]):
+                        unittest_tools.assert_allclose(f(xv, yv), zv)
+                    return
                 # test Elemwise.infer_shape
                 # the Shape op don't implement c_code!
                 if isinstance(linker, PerformLinker):
@@ -268,6 +290,7 @@ def with_linker(self, linker, op, type, rand_val):
                     assert tuple(f(xv, yv)) == tuple(zv.shape)
 
     def with_linker_inplace(self, linker, op, type, rand_val):
+        # TODO: refactor me to move test cases outside the function
         for shape_info in ("complete", "only_broadcastable", "none"):
             for xsh, ysh in [
                 ((5, 5), (5, 5)),
@@ -279,6 +302,7 @@ def with_linker_inplace(self, linker, op, type, rand_val):
                 ((2, 3, 4, 5), (1, 1, 1, 1)),
                 ((), ()),
             ]:
+                exc = None
                 if shape_info == "complete":
                     x_type = type(pytensor.config.floatX, shape=xsh)
                     y_type = type(pytensor.config.floatX, shape=ysh)
@@ -287,15 +311,17 @@ def with_linker_inplace(self, linker, op, type, rand_val):
                     # type shape provided by PyTensor was broadcastable/non-broadcastable
                     x_type = type(
                         pytensor.config.floatX,
-                        shape=tuple(s if s == 1 else None for s in xsh),
+                        broadcastable=tuple(s == 1 for s in xsh),
                     )
                     y_type = type(
                         pytensor.config.floatX,
-                        shape=tuple(s if s == 1 else None for s in ysh),
+                        broadcastable=tuple(s == 1 for s in ysh),
                     )
                 else:
                     x_type = type(pytensor.config.floatX, shape=[None for _ in xsh])
                     y_type = type(pytensor.config.floatX, shape=[None for _ in ysh])
+                    if any(s1 != s2 for s1, s2 in zip(xsh, ysh)):
+                        exc = ValueError(".*broadcasting.*")
 
                 x = x_type("x")
                 y = y_type("y")
@@ -305,7 +331,12 @@ def with_linker_inplace(self, linker, op, type, rand_val):
                 yv = rand_val(ysh)
                 zv = xv + yv
 
-                f(xv, yv)
+                if exc is None:
+                    f(xv, yv)
+                else:
+                    with pytest.raises(exc.__class__, match=exc.args[0]):
+                        f(xv, yv)
+                    return
 
                 assert (xv == zv).all()
                 # test Elemwise.infer_shape
@@ -776,33 +807,6 @@ def test_input_dimensions_overflow(self):
         g = pytensor.function([a, b, c, d, e, f], s, mode=Mode(linker="py"))
         g(*[np.zeros(2**11, config.floatX) for i in range(6)])
 
-    def check_input_dimensions_match(self, mode):
-        """Make sure that our input validation works correctly and doesn't
-        throw erroneous broadcast-based errors.
-        """
-        x_v = matrix("x")
-        m_v = vector("m")
-
-        x = np.array([[-1.32720483], [0.23442016]]).astype(config.floatX)
-        m = np.array([0.0, 0.0]).astype(config.floatX)
-
-        z_v = x_v - m_v
-        f = pytensor.function([x_v, m_v], z_v, mode=mode)
-
-        res = f(x, m)
-
-        assert np.array_equal(res, x - m)
-
-    def test_input_dimensions_match_python(self):
-        self.check_input_dimensions_match(Mode(linker="py"))
-
-    @pytest.mark.skipif(
-        not pytensor.config.cxx,
-        reason="G++ not available, so we need to skip this test.",
-    )
-    def test_input_dimensions_match_c(self):
-        self.check_input_dimensions_match(Mode(linker="c"))
-
     def test_str(self):
         op = Elemwise(aes.add, inplace_pattern=None, name=None)
         assert str(op) == "Elemwise{add}"
@@ -828,7 +832,14 @@ def test_partial_static_shape_info(self):
         assert pytensor.get_scalar_constant_value(res_shape[0][0]) == 1
         assert pytensor.get_scalar_constant_value(res_shape[0][1]) == 1
 
-    def test_multi_output(self):
+    @pytest.mark.xfail(reason="Undefined API for incorrect subclassing of Elemwise")
+    def test_multi_output_bad_implementation(self):
+        """
+        In the below implementation make_node does not follow the broadcasting assumptions
+        and the behaviour is somewhat undefined. Should we return propagated shapes [(1, 1), (1, 1)]
+        or we should instead raise an error because the output dimension no longer broadcasts?
+        """
+
         class CustomElemwise(Elemwise):
             def make_node(self, *args):
                 res = super().make_node(*args)
@@ -838,18 +849,42 @@ def make_node(self, *args):
                     # Return two outputs
                     [
                         TensorType(dtype="float64", shape=(None, None))()
-                        for i in range(2)
+                        for _ in range(2)
                     ],
                 )
 
-        z_1, z_2 = CustomElemwise(aes.add)(
+        z_1, _ = CustomElemwise(aes.add)(
+            as_tensor_variable(np.eye(1)), as_tensor_variable(np.eye(1))
+        )
+
+        in_1_shape = (aes.constant(1), aes.constant(1))
+
+        # should not fail, actually
+        out_1_shape, _ = z_1.owner.op.infer_shape(
+            None, z_1.owner, [in_1_shape, in_1_shape]
+        )
+        assert isinstance(out_1_shape[0], TensorVariable)
+        assert out_1_shape[0][0].data == in_1_shape[0].data
+
+    def test_multi_output_correct_implementation(self):
+        class CustomAdd(aes.add.__class__):
+            nout = 2
+
+        def out_prefs(*types):
+            t = pytensor.scalar.upcast_out(*types)
+            return t * 2
+
+        z_1, _ = Elemwise(CustomAdd(out_prefs, name="add2"))(
             as_tensor_variable(np.eye(1)), as_tensor_variable(np.eye(1))
         )
 
         in_1_shape = (aes.constant(1), aes.constant(1))
 
-        with pytest.raises(ShapeError):
-            z_1.owner.op.infer_shape(None, z_1.owner, [in_1_shape, in_1_shape])
+        out_1_shape, _ = z_1.owner.op.infer_shape(
+            None, z_1.owner, [in_1_shape, in_1_shape]
+        )
+        assert isinstance(out_1_shape[0], TensorVariable)
+        assert out_1_shape[0].data == in_1_shape[0].data
 
     def test_shape_types(self):
         x = tensor(dtype=np.float64, shape=(None, 1))
@@ -893,10 +928,127 @@ def test_invalid_static_shape(self):
         y = tensor(dtype="float64", shape=(3,))
         with pytest.raises(
             ValueError,
-            match=re.escape("Incompatible Elemwise input shapes [(2,), (3,)]"),
+            match=re.escape("Incompatible Elemwise input broadcasting pattern"),
         ):
             x + y
 
+    def test_broadcast_specification(self):
+        with pytest.raises(
+            ValueError,
+            match="Unknown dims can't be broadcastable or should be set explicitly to ones",
+        ):
+            x = tensor(dtype="float64", shape=(None, 2), broadcastable=(True, False))
+        # respect user broadcastable restriction when shape is one and shape
+        x = tensor(dtype="float64", shape=(1, 2), broadcastable=(False, False))
+        # respect user provided broadcastable
+        assert x.type.broadcastable == (False, False)
+
+    def test_no_static_broadcast(self):
+        x = tensor(dtype="float64", shape=(1, 2), broadcastable=(False, False))
+        y = tensor(dtype="float64", shape=(2, 1))
+        with pytest.raises(
+            ValueError,
+            match=re.escape("Incompatible Elemwise input broadcasting pattern"),
+        ):
+            z = x + y
+            print(x.type.broadcastable, y.type.broadcastable, z.type.broadcastable)
+
+    def test_broadcasting_not_promoted_with_elemwise_op(self):
+        """
+        After an elemwise op is performed, broadcasting semantics should respect
+        input broadcasting patterns but not the shape of inputs
+        """
+        x = tensor(dtype="float64", shape=(1, 1), broadcastable=(False, True))
+        y = tensor(dtype="float64", shape=(1, 1))
+        assert (x + y).type.broadcastable == (False, True)
+
+    def test_broadcasted_gradients(self):
+        x_row = pytensor.tensor.row("x_row")
+        y = pytensor.tensor.matrix("y")
+
+        x_row_grad = pytensor.grad(pytensor.tensor.sum(x_row + y), wrt=x_row)
+
+        f_row = pytensor.function([x_row, y], x_row_grad, allow_input_downcast=True)
+        r_row = f_row(np.ones((1, 5)), np.ones((5, 5)))
+        np.testing.assert_allclose(r_row, np.full((1, 5), 5.0))
+        # [[5. 5. 5. 5. 5.]]
+
+    @pytest.mark.parametrize("s1,s2", [[(2,), (2, 1)], [(2, 1), (1, 2)], [(1,), (2,)]])
+    def test_no_dynamic_broadcasting(self, s1, s2, mode):
+        # we set the broadcasting to False explicitly, all attempts to broadcast at runtime should be checked
+        x = pytensor.tensor.tensor("x", broadcastable=(False,) * len(s1))
+        y = pytensor.tensor.tensor("y", broadcastable=(False,) * len(s2))
+        f = pytensor.function([x, y], x + y, mode=mode, allow_input_downcast=True)
+        with pytest.raises(ValueError, match=".*broadcasting.*"):
+            # should fail as we do not support dynamic broadcasting
+            f(np.ones(s1), np.ones(s2))
+
+
+@pytest.mark.parametrize(
+    "shapes,broadcasting,expected_shape,expected_broadcasting,exc",
+    [
+        [[], [], None, None, ValueError(".*Length of arguments.*")],
+        [
+            [(1,), (2, 1)],
+            [(True,), (False, True)],
+            None,
+            None,
+            ValueError(".*Shapes length mismatch.*"),
+        ],
+        [
+            [
+                (1,),
+            ],
+            [(True,)],
+            (1,),
+            (True,),
+            None,
+        ],
+        [
+            [
+                (1,),
+            ],
+            [(False,)],
+            (1,),
+            (False,),
+            None,
+        ],
+        [[(1,), (1,)], [(False,), (True,)], (1,), (False,), None],
+        # unknown shapes propagate and kept unbroadcasted
+        [[(None,), (1,)], [(False,), (True,)], (None,), (False,), None],
+        # respect 1 that is not broadcasted
+        [
+            [(2,), (1,)],
+            [(False,), (False,)],
+            None,
+            None,
+            ValueError(".*Incompatible Elemwise input broadcasting pattern.*"),
+        ],
+        # but should broadcast if another dim is also one, the result does not broadcast
+        [[(1,), (1,)], [(False,), (True,)], (1,), (False,), None],
+        # some special case with zero dim
+        [[(0,), (1,)], [(False,), (True,)], (0,), (False,), None],
+        # regular shape error we should raise in elemwise perform
+        [
+            [(0,), (1,), (2,)],
+            [(False,), (True,), (False,)],
+            None,
+            None,
+            ValueError(".*Incompatible Elemwise input broadcasting pattern.*"),
+        ],
+    ],
+)
+def test_broadcast_shapes_function(
+    shapes, broadcasting, expected_shape, expected_broadcasting, exc
+):
+    if exc is not None:
+        with pytest.raises(exc.__class__, match=exc.args[0]):
+            out_shape, out_broadcasting = broadcast_shapes(shapes, broadcasting)
+    else:
+        out_shape, out_broadcasting = broadcast_shapes(shapes, broadcasting)
+        assert out_shape == expected_shape
+        assert out_broadcasting == expected_broadcasting
+
 
 def test_not_implemented_elemwise_grad():
     # Regression test for unimplemented gradient in an Elemwise Op.
diff --git a/tests/tensor/test_shape.py b/tests/tensor/test_shape.py
index d05aa8a55a..daf4b9802d 100644
--- a/tests/tensor/test_shape.py
+++ b/tests/tensor/test_shape.py
@@ -19,7 +19,6 @@
     Shape_i,
     SpecifyShape,
     Unbroadcast,
-    _specify_shape,
     reshape,
     shape,
     shape_i,
@@ -350,13 +349,13 @@ def test_check_inputs(self):
             specify_shape([[1, 2, 3], [4, 5, 6]], (2.2, 3))
 
         with pytest.raises(TypeError, match="must be integer types"):
-            _specify_shape([[1, 2, 3], [4, 5, 6]], *(2.2, 3))
+            SpecifyShape([False, False])([[1, 2, 3], [4, 5, 6]], *(2.2, 3))
 
         with pytest.raises(ValueError, match="will never match"):
             specify_shape(matrix(), [4])
 
         with pytest.raises(ValueError, match="will never match"):
-            _specify_shape(matrix(), *[4])
+            SpecifyShape([False, False])(matrix(), *[4])
 
         with pytest.raises(ValueError, match="must have fixed dimensions"):
             specify_shape(matrix(), vector(dtype="int32"))
diff --git a/tests/tensor/test_type.py b/tests/tensor/test_type.py
index 8ef4ca2864..31f5c3668b 100644
--- a/tests/tensor/test_type.py
+++ b/tests/tensor/test_type.py
@@ -329,14 +329,12 @@ def test_fixed_shape_convert_variable():
     assert res.type.shape == (3, 2)
 
 
-def test_deprecated_kwargs():
-    with pytest.warns(DeprecationWarning, match=".*broadcastable.*"):
-        res = TensorType("float64", broadcastable=(True, False))
+def test_broadcastable_kwargs():
+    res = TensorType("float64", broadcastable=(True, False))
 
     assert res.shape == (1, None)
 
-    with pytest.warns(DeprecationWarning, match=".*broadcastable.*"):
-        new_res = res.clone(broadcastable=(False, True))
+    new_res = res.clone(broadcastable=(False, True))
 
     assert new_res.shape == (None, 1)
 
@@ -355,12 +353,9 @@ def test_tensor_creator_helper():
     assert res.type == TensorType(dtype="int64", shape=(5, None))
     assert res.name == "custom"
 
-    with pytest.warns(
-        DeprecationWarning, match="The `broadcastable` keyword is deprecated"
-    ):
-        res = tensor(dtype="int64", broadcastable=(True, False), name="custom")
-        assert res.type == TensorType("int64", shape=(1, None))
-        assert res.name == "custom"
+    res = tensor(dtype="int64", broadcastable=(True, False), name="custom")
+    assert res.type == TensorType("int64", shape=(1, None))
+    assert res.name == "custom"
 
 
 @pytest.mark.parametrize("dtype", ("floatX", "float64", bool, np.int64))

From d4c334278a79d0151543ba1eeb5582e8d4f9cf67 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 29 Dec 2022 17:58:26 +0300
Subject: [PATCH 2/3] Revert "Broadcast input matrices in Gemm"

This reverts commit a3dc0a72632088da8e758cd8f5009e571698a2f4.
---
 pytensor/tensor/blas.py   | 179 ++++++++++++++------------------------
 tests/tensor/test_blas.py |  71 ---------------
 2 files changed, 63 insertions(+), 187 deletions(-)

diff --git a/pytensor/tensor/blas.py b/pytensor/tensor/blas.py
index bca1e7e88c..7d772dcda2 100644
--- a/pytensor/tensor/blas.py
+++ b/pytensor/tensor/blas.py
@@ -137,6 +137,7 @@
 except ImportError:
     pass
 
+from functools import reduce
 from typing import Tuple
 
 import pytensor.scalar
@@ -639,10 +640,8 @@ def c_header_dirs(self, **kwargs):
         {PyErr_SetString(PyExc_NotImplementedError, "type(b) is not double or float"); %(fail)s;}
         """
 
-    # broadcast_xy = None
-
     check_dims = """
-        if (Nx[0] !=1 && Nz[0] != 1 && Nx[0] != Nz[0])
+        if (Nx[0] != Nz[0])
         {
             PyErr_Format(PyExc_ValueError,
                 "Shape mismatch: x has %%ld rows but z has %%ld rows",
@@ -656,7 +655,7 @@ def c_header_dirs(self, **kwargs):
                 (long int)Nx[1], (long int)Nx[0], (long int)Ny[0], (long int)Ny[1]);
             %(fail)s;
         }
-        if (Ny[1] != 1 && Nz[1]!= 1 && Ny[1] != Nz[1])
+        if (Ny[1] != Nz[1])
         {
             PyErr_Format(PyExc_ValueError,
                 "Shape mismatch: y has %%ld cols but z has %%ld cols",
@@ -842,14 +841,14 @@ def build_gemm_call(self):
         else:
             setup_z_Nz_Sz = self.setup_z_Nz_Sz
 
-        return "".join(
+        return reduce(
+            str.__add__,
             (
                 self.declare_NS,
                 self.check_xyz_rank2,
                 setup_z_Nz_Sz,
                 self.check_xyz_double_or_float,
                 self.check_ab_double_or_float,
-                self.broadcast_xy,
                 self.check_dims,
                 self.check_strides,
                 self.encode_strides_in_unit,
@@ -862,7 +861,8 @@ def build_gemm_call(self):
                 self.case_double_ab_constants,
                 self.case_double_gemm,
                 self.end_switch_typenum,
-            )
+            ),
+            "",
         )
 
     def build_gemm_version(self):
@@ -992,11 +992,6 @@ def perform(self, node, inp, out, params):
             z.itemset(z * a + b * np.dot(x, y))
             zout[0] = z
         else:
-            # Broadcast Z if needed
-            if (x.shape[0] > z.shape[0]) or (y.shape[1] > z.shape[1]):
-                z = np.broadcast_to(
-                    z, (max(x.shape[0], z.shape[0]), max(y.shape[1], z.shape[1]))
-                ).copy()
             if b == 0.0:
                 if a == 1.0:
                     z[:] = np.dot(x, y)
@@ -1017,135 +1012,88 @@ def perform(self, node, inp, out, params):
             zout[0] = z
 
     def infer_shape(self, fgraph, node, input_shapes):
-        z_shape, _, x_shape, y_shape, _ = input_shapes
-        return [
-            (
-                pytensor.scalar.scalar_maximum(z_shape[0], x_shape[0]),
-                pytensor.scalar.scalar_maximum(z_shape[1], y_shape[1]),
-            )
-        ]
+        return [input_shapes[0]]
 
     setup_z_Nz_Sz_inplace = """
-        // Needs broadcasting
-        if (PyArray_DIMS(%(_z)s)[0] < Nx[0] || PyArray_DIMS(%(_z)s)[1] < Ny[1]){
-
-            npy_intp dims[2];
-            dims[0] = (PyArray_DIMS(%(_z)s)[0] >= Nx[0]) ? PyArray_DIMS(%(_z)s)[0] : Nx[0];
-            dims[1] = (PyArray_DIMS(%(_z)s)[1] >= Ny[1]) ? PyArray_DIMS(%(_z)s)[1] : Ny[1];
-
-            // Check if we need to allocate new array
-            if((NULL == %(_zout)s)
-                || (PyArray_DIMS(%(_zout)s)[0] != dims[0])
-                || (PyArray_DIMS(%(_zout)s)[1] != dims[1]))
-            {
-                // fprintf(stderr, "Gemm Allocating z output array with shape (%%i %%i)\\n", dims[0], dims[1]);
-                Py_XDECREF(%(_zout)s);
-                %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, PyArray_TYPE(%(_z)s));
-            }
-
-            // fprintf(stderr, "Gemm Broadcasting Z into shape (%%i %%i)\\n", dims[0], dims[1]);
-            if(PyArray_CopyInto(%(_zout)s, %(_z)s) == -1)
-            {
-                %(fail)s;
-            }
-
-        } else {
-            if (%(_zout)s != %(_z)s)
+        if (%(_zout)s != %(_z)s)
+        {
+            if (%(_zout)s)
             {
-                Py_XDECREF(%(_zout)s);
-                %(_zout)s = %(_z)s;
-                Py_INCREF(%(_zout)s);
+                Py_DECREF(%(_zout)s);
             }
+            %(_zout)s = %(_z)s;
+            Py_INCREF(%(_zout)s);
         }
-
-        Nz = PyArray_DIMS(%(_zout)s);
-        Sz = PyArray_STRIDES(%(_zout)s);
+        Nz = PyArray_DIMS(%(_z)s);
+        Sz = PyArray_STRIDES(%(_z)s);
         """
 
     setup_z_Nz_Sz_outplace = """
-        npy_intp dims[2];
-        dims[0] = (PyArray_DIMS(%(_z)s)[0] >= Nx[0]) ? PyArray_DIMS(%(_z)s)[0] : Nx[0];
-        dims[1] = (PyArray_DIMS(%(_z)s)[1] >= Ny[1]) ? PyArray_DIMS(%(_z)s)[1] : Ny[1];
-
-        // Check if we need to allocate new array
         if ((NULL == %(_zout)s)
-            || (PyArray_DIMS(%(_zout)s)[0] != dims[0])
-            || (PyArray_DIMS(%(_zout)s)[1] != dims[1]))
+            || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_z)s)[0])
+            || (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_z)s)[1])
+            || (PyArray_STRIDES(%(_zout)s)[0] <= 0)
+            || (PyArray_STRIDES(%(_zout)s)[1] <= 0)
+            || (PyArray_STRIDES(%(_zout)s)[0] MOD type_size)
+            || (PyArray_STRIDES(%(_zout)s)[1] MOD type_size)
+            || ((PyArray_STRIDES(%(_zout)s)[0] != type_size)
+                && (PyArray_STRIDES(%(_zout)s)[1] != type_size)))
         {
             Py_XDECREF(%(_zout)s);
-            %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, PyArray_TYPE(%(_z)s));
-            // fprintf(stderr, "Gemm Allocating z output array with shape (%%i %%i)\\n", dims[0], dims[1]);
+            npy_intp dims[2];
+            dims[0] = PyArray_DIMS(%(_z)s)[0];
+            dims[1] = PyArray_DIMS(%(_z)s)[1];
+            %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
+                                                          PyArray_TYPE(%(_z)s));
+            //fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
             if(!%(_zout)s) {
                 PyErr_SetString(PyExc_MemoryError,
                                 "failed to alloc gemm_no_inplace output");
                 %(fail)s
             }
         }
-
-        // fprintf(stderr, "Gemm Broadcasting Z into shape (%%i %%i)\\n", dims[0], dims[1]);
-        if(PyArray_CopyInto(%(_zout)s, %(_z)s) == -1)
-        {
-            %(fail)s
-        }
-
         Nz = PyArray_DIMS(%(_zout)s);
         Sz = PyArray_STRIDES(%(_zout)s);
-        """
 
-    broadcast_xy = """
-        // Broadcast X if needed
-        if (Nz[0] > Nx[0])
+        if (PyArray_DESCR(%(_zout)s)->type_num == NPY_FLOAT)
         {
-            npy_intp dims[2];
-            dims[0] = Nz[0];
-            dims[1] = Nx[1];
-            // fprintf(stderr, "Gemm Broadcasting X into shape (%%i %%i)\\n", dims[0], dims[1]);
-            PyArrayObject *x_new = (PyArrayObject*)PyArray_SimpleNew(2, dims, PyArray_TYPE(%(_x)s));
-            if(!x_new) {
-                PyErr_SetString(PyExc_MemoryError,
-                                "failed to alloc gemm_inplace input");
-                %(fail)s
-            }
-
-            if(PyArray_MoveInto(x_new, %(_x)s) == -1)
+            float * zoutdata = (float*)PyArray_DATA(%(_zout)s);
+            int zoi = Sz[0] / sizeof(float);
+            int zoj = Sz[1] / sizeof(float);
+            const float * zdata = (float*)PyArray_DATA(%(_z)s);
+            int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(float);
+            int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(float);
+            for (int i = 0; i < Nz[0]; ++i)
             {
-                %(fail)s
+                for (int j = 0; j < Nz[1]; ++j)
+                {
+                    zoutdata[zoi*i + zoj*j] = zdata[zi*i + zj*j];
+                }
             }
-
-            Py_DECREF(%(_x)s);
-            %(_x)s = x_new;
-
-            Nx = PyArray_DIMS(%(_x)s);
-            Sx = PyArray_STRIDES(%(_x)s);
         }
-
-        // Broadcast Y if needed
-        if (Nz[1] > Ny[1])
+        else if (PyArray_DESCR(%(_zout)s)->type_num == NPY_DOUBLE)
         {
-            npy_intp dims[2];
-            dims[0] = Ny[0];
-            dims[1] = Nz[1];
-            // fprintf(stderr, "Gemm Broadcasting Y into shape (%%i %%i)\\n", dims[0], dims[1]);
-            PyArrayObject *y_new = (PyArrayObject*)PyArray_SimpleNew(2, dims, PyArray_TYPE(%(_x)s));
-            if(!y_new) {
-                PyErr_SetString(PyExc_MemoryError,
-                                "failed to alloc gemm_inplace input");
-                %(fail)s
-            }
-
-            if(PyArray_MoveInto(y_new, %(_y)s) == -1)
+            double * zoutdata = (double*) PyArray_DATA(%(_zout)s);
+            int zoi = Sz[0] / sizeof(double);
+            int zoj = Sz[1] / sizeof(double);
+            const double * zdata = (double*)PyArray_DATA(%(_z)s);
+            int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(double);
+            int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(double);
+            for (int i = 0; i < Nz[0]; ++i)
             {
-                %(fail)s
+                for (int j = 0; j < Nz[1]; ++j)
+                {
+                    zoutdata[zoi*i + zoj*j] = zdata[zi*i + zj*j];
+                }
             }
-
-            Py_DECREF(%(_y)s);
-            %(_y)s = y_new;
-
-            Ny = PyArray_DIMS(%(_y)s);
-            Sy = PyArray_STRIDES(%(_y)s);
         }
-
-    """
+        else
+        {
+            PyErr_SetString(PyExc_AssertionError,
+                            "neither float nor double dtype");
+            %(fail)s
+        }
+        """
 
     case_float_ab_constants = """
         #define REAL float
@@ -1179,7 +1127,7 @@ def c_code(self, node, name, inp, out, sub):
     def c_code_cache_version(self):
         gv = self.build_gemm_version()
         if gv:
-            return (7,) + gv
+            return (8,) + gv
         else:
             return gv
 
@@ -1253,6 +1201,7 @@ def _beta_L_plus_alpha_M(fgraph, beta, L, alpha, M, recurse_flip=True):
     if M.owner and M.owner.op == _dot22:
         Ml, Mr = M.owner.inputs
         rval = [gemm_no_inplace(L, alpha, Ml, Mr, beta)]
+        # print 'GEMM 0', rval, beta, L, alpha, M
         return rval, M
 
     # it also might be the case that there is a dimshuffle between the +
@@ -1719,7 +1668,6 @@ def infer_shape(self, fgraph, node, input_shapes):
         Sz = PyArray_STRIDES(%(_zout)s);
 
         """
-    broadcast_xy = ""
     check_ab_double_or_float = ""
     case_float_ab_constants = """
                 float a = 1.0;
@@ -2003,7 +1951,6 @@ def infer_shape(self, fgraph, node, input_shapes):
         return [[input_shapes[0][0], input_shapes[1][1]]]
 
     setup_z_Nz_Sz = Dot22.setup_z_Nz_Sz
-    broadcast_xy = ""
 
     check_ab_double_or_float = """
         if ((PyArray_DESCR(%(_a)s)->type_num != NPY_DOUBLE)
diff --git a/tests/tensor/test_blas.py b/tests/tensor/test_blas.py
index 5aa77fea61..a6d21acf5f 100644
--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
@@ -1,6 +1,5 @@
 from copy import copy
 from itertools import product
-from random import shuffle
 
 import numpy as np
 import pytest
@@ -68,7 +67,6 @@
     matrix,
     row,
     scalar,
-    scalars,
     tensor,
     tensor3,
     tensor4,
@@ -1044,41 +1042,6 @@ def test_inplace1():
     assert [n.op for n in f.maker.fgraph.apply_nodes] == [gemm_no_inplace]
 
 
-@pytest.mark.parametrize("linker", ("py", "cvm"))
-@pytest.mark.parametrize("inplace", (False, True))
-def test_gemm_broadcasting(inplace, linker):
-    a, b = scalars("a", "b")
-    z, x, y = matrices("z", "x", "y")
-
-    mode = Mode(linker=linker)
-    if inplace:
-        out = gemm_inplace(z, a, x, y, b)
-        f = pytensor.function([z, x, y, a, b], out, accept_inplace=True, mode=mode)
-        assert [node.op for node in f.maker.fgraph.toposort()] == [gemm_inplace]
-    else:
-        out = gemm_no_inplace(z, a, x, y, b)
-        f = pytensor.function([z, x, y, a, b], out, mode=mode)
-        assert [node.op for node in f.maker.fgraph.toposort()] == [gemm_no_inplace]
-
-    shapes_z = [(5, 3), (1, 3), (5, 1), (1, 1)]
-    shapes_x = [(5, 4), (1, 4)]
-    shapes_y = [(4, 3), (4, 1)]
-
-    rng = np.random.default_rng()
-    shuffle(shapes_z)
-    shuffle(shapes_x)
-    shuffle(shapes_y)
-    for shape_z, shape_x, shape_y in product(shapes_z, shapes_x, shapes_y):
-        z_v = rng.random(size=shape_z).astype(config.floatX)
-        x_v = rng.random(size=shape_x).astype(config.floatX)
-        y_v = rng.random(size=shape_y).astype(config.floatX)
-        # We have to copy for the inplace case
-        z_v_np = z_v.copy()
-        np.testing.assert_allclose(
-            f(z_v, x_v, y_v, 1, 1), z_v_np + np.dot(x_v, y_v), atol=2e-6
-        )
-
-
 def test_dot22():
     for dtype1 in ["float32", "float64", "complex64", "complex128"]:
         a = matrix(dtype=dtype1)
@@ -2513,40 +2476,6 @@ def test_gemm(self):
             Gemm,
         )
 
-    def test_gemm_broadcast(self):
-        rng = np.random.default_rng(unittest_tools.fetch_seed())
-        x, y, z = matrices("xyz")
-        a = scalar("a")
-        b = scalar("b")
-
-        # Broadcast Z
-        self._compile_and_check(
-            [x, y, a, z, b],
-            [gemm(z, a, x, y, b)],
-            [
-                rng.random((2, 3)).astype(config.floatX),
-                rng.random((3, 4)).astype(config.floatX),
-                np.asarray(0.5, dtype=config.floatX),
-                rng.random((1, 4)).astype(config.floatX),
-                np.asarray(0.5, dtype=config.floatX),
-            ],
-            Gemm,
-        )
-
-        # Broadcast dot(X, Y)
-        self._compile_and_check(
-            [x, y, a, z, b],
-            [gemm(z, a, x, y, b)],
-            [
-                rng.random((1, 3)).astype(config.floatX),
-                rng.random((3, 4)).astype(config.floatX),
-                np.asarray(0.5, dtype=config.floatX),
-                rng.random((5, 4)).astype(config.floatX),
-                np.asarray(1, dtype=config.floatX),
-            ],
-            Gemm,
-        )
-
     def test_gemv(self):
         rng = np.random.default_rng(unittest_tools.fetch_seed())
         A = matrix("A")

From e5b5294a6aaf9a043e70a56e6964edf35bb60959 Mon Sep 17 00:00:00 2001
From: Maxim Kochurov <max.kochurov@pymc-devs.org>
Date: Thu, 29 Dec 2022 18:10:18 +0300
Subject: [PATCH 3/3] Revert "Clean up some usage of the TensorType interface
 in Scan"

This reverts commit 471657a5ac3e9868797bcd779a4e635915999c5d.
---
 pytensor/scan/op.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pytensor/scan/op.py b/pytensor/scan/op.py
index 189b167c34..d7c5c2886a 100644
--- a/pytensor/scan/op.py
+++ b/pytensor/scan/op.py
@@ -156,9 +156,8 @@ def check_broadcast(v1, v2):
     which may wrongly be interpreted as broadcastable.
 
     """
-    if not isinstance(v1.type, TensorType) and not isinstance(v2.type, TensorType):
+    if not hasattr(v1, "broadcastable") and not hasattr(v2, "broadcastable"):
         return
-
     msg = (
         "The broadcast pattern of the output of scan (%s) is "
         "inconsistent with the one provided in `output_info` "
@@ -169,13 +168,13 @@ def check_broadcast(v1, v2):
         "them consistent, e.g. using pytensor.tensor."
         "{unbroadcast, specify_broadcastable}."
     )
-    size = min(v1.type.ndim, v2.type.ndim)
+    size = min(len(v1.broadcastable), len(v2.broadcastable))
     for n, (b1, b2) in enumerate(
-        zip(v1.type.broadcastable[-size:], v2.type.broadcastable[-size:])
+        zip(v1.broadcastable[-size:], v2.broadcastable[-size:])
     ):
         if b1 != b2:
-            a1 = n + size - v1.type.ndim + 1
-            a2 = n + size - v2.type.ndim + 1
+            a1 = n + size - len(v1.broadcastable) + 1
+            a2 = n + size - len(v2.broadcastable) + 1
             raise TypeError(msg % (v1.type, v2.type, a1, b1, b2, a2))
 
 
@@ -624,7 +623,6 @@ def validate_inner_graph(self):
                 type_input = self.inner_inputs[inner_iidx].type
                 type_output = self.inner_outputs[inner_oidx].type
                 if (
-                    # TODO: Use the `Type` interface for this
                     type_input.dtype != type_output.dtype
                     or type_input.broadcastable != type_output.broadcastable
                 ):