Deprecate AllocDiag Op in favor of equivalent PyTensor graph

ricardoV94 · ricardoV94 · commit 71c58f394ee2 · 2023-08-24T23:19:07.000+02:00
diff --git a/pytensor/link/jax/dispatch/tensor_basic.py b/pytensor/link/jax/dispatch/tensor_basic.py
@@ -8,7 +8,6 @@
 from pytensor.tensor import get_vector_length
 from pytensor.tensor.basic import (
     Alloc,
-    AllocDiag,
     AllocEmpty,
     ARange,
     ExtractDiag,
@@ -32,16 +31,6 @@
 """
 
 
-@jax_funcify.register(AllocDiag)
-def jax_funcify_AllocDiag(op, **kwargs):
-    offset = op.offset
-
-    def allocdiag(v, offset=offset):
-        return jnp.diag(v, k=offset)
-
-    return allocdiag
-
-
 @jax_funcify.register(AllocEmpty)
 def jax_funcify_AllocEmpty(op, **kwargs):
     def allocempty(*shape):
diff --git a/pytensor/link/numba/dispatch/tensor_basic.py b/pytensor/link/numba/dispatch/tensor_basic.py
@@ -7,7 +7,6 @@
 from pytensor.link.utils import compile_function_src, unique_name_generator
 from pytensor.tensor.basic import (
     Alloc,
-    AllocDiag,
     AllocEmpty,
     ARange,
     ExtractDiag,
@@ -93,17 +92,6 @@ def alloc(val, {", ".join(shape_var_names)}):
     return numba_basic.numba_njit(alloc_fn)
 
 
-@numba_funcify.register(AllocDiag)
-def numba_funcify_AllocDiag(op, **kwargs):
-    offset = op.offset
-
-    @numba_basic.numba_njit(inline="always")
-    def allocdiag(v):
-        return np.diag(v, k=offset)
-
-    return allocdiag
-
-
 @numba_funcify.register(ARange)
 def numba_funcify_ARange(op, **kwargs):
     dtype = np.dtype(op.dtype)
diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
@@ -6,6 +6,7 @@
 """
 
 import builtins
+import warnings
 from functools import partial
 from numbers import Number
 from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union
@@ -3450,7 +3451,7 @@ def grad(self, inputs, gout):
         x_grad = zeros_like(moveaxis(x, (axis1, axis2), (0, 1)))
 
         # Fill zeros with output diagonal
-        xdiag = AllocDiag(offset=0, axis1=0, axis2=1)(gz)
+        xdiag = alloc_diag(gz, offset=0, axis1=0, axis2=1)
         z_len = xdiag.shape[0]
         if offset >= 0:
             diag_slices = (slice(None, z_len), slice(offset, offset + z_len))
@@ -3544,6 +3545,10 @@ def __init__(self, offset=0, axis1=0, axis2=1):
             Axis to be used as the second axis of the 2-D sub-arrays to which
             the diagonals will be allocated.  Defaults to second axis (i.e. 1).
         """
+        warnings.warn(
+            "AllocDiag is deprecated. Use `alloc_diag` instead",
+            FutureWarning,
+        )
         self.offset = offset
         if axis1 < 0 or axis2 < 0:
             raise NotImplementedError("AllocDiag does not support negative axis")
@@ -3625,6 +3630,43 @@ def __setstate__(self, state):
             self.axis2 = 1
 
 
+def alloc_diag(diag, offset=0, axis1=0, axis2=1):
+    """Insert a vector on the diagonal of a zero-ed matrix.
+
+    diagonal(alloc_diag(x)) == x
+    """
+    from pytensor.tensor import set_subtensor
+
+    diag = as_tensor_variable(diag)
+    axis1, axis2 = normalize_axis_tuple((axis1, axis2), ndim=diag.type.ndim + 1)
+    if axis1 > axis2:
+        axis1, axis2 = axis2, axis1
+
+    # Create array with one extra dimension for resulting matrix
+    result_shape = tuple(diag.shape)[:-1] + (diag.shape[-1] + abs(offset),) * 2
+    result = zeros(result_shape, dtype=diag.dtype)
+
+    # Create slice for diagonal in final 2 axes
+    idxs = arange(diag.shape[-1])
+    diagonal_slice = (slice(None),) * (len(result_shape) - 2) + (
+        idxs + np.maximum(0, -offset),
+        idxs + np.maximum(0, offset),
+    )
+
+    # Fill in final 2 axes with diag
+    result = set_subtensor(result[diagonal_slice], diag)
+
+    if diag.type.ndim > 1:
+        # Re-order axes so they correspond to diagonals at axis1, axis2
+        axes = list(range(diag.type.ndim - 1))
+        last_idx = axes[-1]
+        axes = axes[:axis1] + [last_idx + 1] + axes[axis1:]
+        axes = axes[:axis2] + [last_idx + 2] + axes[axis2:]
+        result = result.transpose(axes)
+
+    return result
+
+
 def diag(v, k=0):
     """
     A helper function for two ops: `ExtractDiag` and
@@ -3650,7 +3692,7 @@ def diag(v, k=0):
     _v = as_tensor_variable(v)
 
     if _v.ndim == 1:
-        return AllocDiag(k)(_v)
+        return alloc_diag(_v, offset=k)
     elif _v.ndim == 2:
         return diagonal(_v, offset=k)
     else:
diff --git a/tests/link/jax/test_slinalg.py b/tests/link/jax/test_slinalg.py
@@ -85,7 +85,7 @@ def test_jax_basic():
         ],
     )
 
-    out = at.diag(b)
+    out = at.diag(at.specify_shape(b, shape=(10,)))
     out_fg = FunctionGraph([b], [out])
     compare_jax_and_py(out_fg, [np.arange(10).astype(config.floatX)])
 
diff --git a/tests/link/numba/test_tensor_basic.py b/tests/link/numba/test_tensor_basic.py
@@ -57,28 +57,6 @@ def test_AllocEmpty():
     compare_numba_and_py(x_fg, [], assert_fn=compare_shape_dtype)
 
 
-@pytest.mark.parametrize(
-    "v, offset",
-    [
-        (set_test_value(at.vector(), np.arange(10, dtype=config.floatX)), 0),
-        (set_test_value(at.vector(), np.arange(10, dtype=config.floatX)), 1),
-        (set_test_value(at.vector(), np.arange(10, dtype=config.floatX)), -1),
-    ],
-)
-def test_AllocDiag(v, offset):
-    g = atb.AllocDiag(offset=offset)(v)
-    g_fg = FunctionGraph(outputs=[g])
-
-    compare_numba_and_py(
-        g_fg,
-        [
-            i.tag.test_value
-            for i in g_fg.inputs
-            if not isinstance(i, (SharedVariable, Constant))
-        ],
-    )
-
-
 @pytest.mark.parametrize(
     "v", [set_test_value(aes.float64(), np.array(1.0, dtype="float64"))]
 )
diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py
@@ -23,7 +23,6 @@
 from pytensor.tensor import NoneConst
 from pytensor.tensor.basic import (
     Alloc,
-    AllocDiag,
     AllocEmpty,
     ARange,
     Choose,
@@ -92,7 +91,7 @@
 from pytensor.tensor.exceptions import NotScalarConstantError
 from pytensor.tensor.math import dense_dot
 from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.shape import Reshape, Shape, Shape_i, shape_padright, specify_shape
+from pytensor.tensor.shape import Reshape, Shape_i, shape_padright, specify_shape
 from pytensor.tensor.type import (
     TensorType,
     bscalar,
@@ -3571,7 +3570,6 @@ def test_diag(self):
         # test vector input
         x = vector()
         g = diag(x)
-        assert isinstance(g.owner.op, AllocDiag)
         f = pytensor.function([x], g)
         for shp in [5, 0, 1]:
             m = rng.random(shp).astype(self.floatX)
@@ -3654,10 +3652,6 @@ def test_grad_3d(self, offset, axis1, axis2):
 class TestAllocDiag:
     # TODO: Separate perform, grad and infer_shape tests
 
-    def setup_method(self):
-        self.alloc_diag = AllocDiag
-        self.mode = pytensor.compile.mode.get_default_mode()
-
     def _generator(self):
         dims = 4
         shape = (5,) * dims
@@ -3690,34 +3684,28 @@ def test_alloc_diag_values(self):
                 # Test perform
                 if np.maximum(axis1, axis2) > len(test_val.shape):
                     continue
-                adiag_op = self.alloc_diag(offset=offset, axis1=axis1, axis2=axis2)
-                f = pytensor.function([x], adiag_op(x))
-                # AllocDiag and extract the diagonal again
-                # to check
+                diag_x = at.alloc_diag(x, offset=offset, axis1=axis1, axis2=axis2)
+                f = pytensor.function([x], diag_x)
+                # alloc_diag and extract the diagonal again to check for correctness
                 diag_arr = f(test_val)
                 rediag = np.diagonal(diag_arr, offset=offset, axis1=axis1, axis2=axis2)
                 assert np.all(rediag == test_val)
 
                 # Test infer_shape
-                f_shape = pytensor.function([x], adiag_op(x).shape, mode="FAST_RUN")
+                f_shape = pytensor.function([x], diag_x.shape, mode="FAST_RUN")
 
                 output_shape = f_shape(test_val)
-                assert not any(
-                    isinstance(node.op, self.alloc_diag)
-                    for node in f_shape.maker.fgraph.toposort()
-                )
                 rediag_shape = np.diagonal(
                     np.ones(output_shape), offset=offset, axis1=axis1, axis2=axis2
                 ).shape
                 assert np.all(rediag_shape == test_val.shape)
 
                 # Test grad
-                diag_x = adiag_op(x)
                 sum_diag_x = at_sum(diag_x)
                 grad_x = pytensor.grad(sum_diag_x, x)
                 grad_diag_x = pytensor.grad(sum_diag_x, diag_x)
-                f_grad_x = pytensor.function([x], grad_x, mode=self.mode)
-                f_grad_diag_x = pytensor.function([x], grad_diag_x, mode=self.mode)
+                f_grad_x = pytensor.function([x], grad_x)
+                f_grad_diag_x = pytensor.function([x], grad_diag_x)
                 grad_input = f_grad_x(test_val)
                 grad_diag_input = f_grad_diag_x(test_val)
                 true_grad_input = np.diagonal(
@@ -3894,20 +3882,6 @@ def test_ExtractDiag(self):
         atens3_diag = ExtractDiag(1, 2, 0)(atens3)
         self._compile_and_check([atens3], [atens3_diag], [atens3_val], ExtractDiag)
 
-    def test_AllocDiag(self):
-        advec = dvector()
-        advec_val = random(4)
-        self._compile_and_check([advec], [AllocDiag()(advec)], [advec_val], AllocDiag)
-
-        # Shape
-        # 'opt.Makevector' precludes optimizer from disentangling
-        # elements of shape
-        adtens = tensor3()
-        adtens_val = random(4, 5, 3)
-        self._compile_and_check(
-            [adtens], [Shape()(adtens)], [adtens_val], (MakeVector, Shape)
-        )
-
     def test_Split(self):
         aiscal = iscalar()
         aivec = ivector()

Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ def test_jax_basic():`
`85`	`85`	`],`
`86`	`86`	`)`
`87`	`87`
`88`		`- out = at.diag(b)`
	`88`	`+ out = at.diag(at.specify_shape(b, shape=(10,)))`
`89`	`89`	`out_fg = FunctionGraph([b], [out])`
`90`	`90`	`compare_jax_and_py(out_fg, [np.arange(10).astype(config.floatX)])`
`91`	`91`