Cleanup Rop tests and fix Max Rop implementation

ricardoV94 · ricardoV94 · commit 08152d06be47 · 2025-02-14T18:50:52.000+01:00
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
@@ -431,20 +431,25 @@ def L_op(self, inputs, outputs, grads):
         return (g_x,)
 
     def R_op(self, inputs, eval_points):
+        [x] = inputs
         if eval_points[0] is None:
-            return [None, None]
-        if len(self.axis) != 1:
-            raise ValueError("R_op supported for max only for one axis!")
-        if self.axis[0] > 1:
-            raise ValueError("R_op supported for max only when axis is 0 or 1")
+            return [None]
+        axis = tuple(range(x.ndim) if self.axis is None else self.axis)
+        if isinstance(axis, int):
+            axis = [axis]
+        if len(axis) != 1:
+            raise NotImplementedError("R_op supported for max only for one axis!")
+        if axis[0] > 1:
+            raise NotImplementedError("R_op supported for max only when axis is 0 or 1")
         if inputs[0].ndim != 2:
-            raise ValueError("R_op supported for max only when input is a matrix")
-        max_pos = Argmax(self.axis).make_node(*inputs).outputs
-        # print(eval_points[0].eval())
+            raise NotImplementedError(
+                "R_op supported for max only when input is a matrix"
+            )
+        max_pos = Argmax(self.axis)(*inputs)
         if self.axis[0] == 0:
-            return [eval_points[0][max_pos, arange(eval_points[0].shape[1])], None]
+            return [eval_points[0][max_pos, arange(eval_points[0].shape[1])]]
         else:
-            return [eval_points[0][arange(eval_points[0].shape[0]), max_pos], None]
+            return [eval_points[0][arange(eval_points[0].shape[0]), max_pos]]
 
 
 class Min(NonZeroDimsCAReduce):
diff --git a/tests/scan/test_basic.py b/tests/scan/test_basic.py
@@ -1992,9 +1992,9 @@ def rnn_fn(_u, _y, _W):
         vnu, vnh0, vnW = fn_rop(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
         tnu, tnh0, tnW = fn_test(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
 
-        utt.assert_allclose(vnu, tnu, atol=1e-6)
-        utt.assert_allclose(vnh0, tnh0, atol=1e-6)
-        utt.assert_allclose(vnW, tnW, atol=1e-6)
+        np.testing.assert_allclose(vnu, tnu, atol=1e-6)
+        np.testing.assert_allclose(vnh0, tnh0, atol=1e-6)
+        np.testing.assert_allclose(vnW, tnW, atol=1e-6)
 
     @pytest.mark.slow
     def test_R_op_2(self):
@@ -2074,9 +2074,9 @@ def rnn_fn(_u, _y, _W):
         )
 
         tnu, tnh0, tnW, tno = fn_test(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
-        utt.assert_allclose(vnu, tnu, atol=1e-6)
-        utt.assert_allclose(vnh0, tnh0, atol=1e-6)
-        utt.assert_allclose(vnW, tnW, atol=2e-6)
+        np.testing.assert_allclose(vnu, tnu, atol=1e-6)
+        np.testing.assert_allclose(vnh0, tnh0, atol=1e-6)
+        np.testing.assert_allclose(vnW, tnW, atol=2e-6)
 
     def test_R_op_mitmot(self):
         # this test is a copy paste from the script given by Justin Bayer to
@@ -2094,13 +2094,10 @@ def test_R_op_mitmot(self):
         W1 = pars[:3].reshape(W1shape)
         W2 = pars[3:].reshape(W2shape)
 
-        # Define recurrent model. We are using a model where each input is a
-        # tensor
-        # of shape (T, B, D) where T is the number of timesteps, B is the
-        # number of
-        # sequences iterated over in parallel and D is the dimensionality of
-        # each
-        # item at a timestep.
+        # Define recurrent model. We are using a model where each input
+        # is a tensor of shape (T, B, D) where T is the number of timesteps,
+        # B is the number of sequences iterated over in parallel and
+        # D is the dimensionality of each item at a timestep.
 
         inpt = tensor3("inpt")
         target = tensor3("target")
@@ -2128,6 +2125,7 @@ def test_R_op_mitmot(self):
         d_cost_wrt_pars = grad(cost, pars)
 
         p = dvector()
+        # TODO: We should test something about the Rop!
         Rop(d_cost_wrt_pars, pars, p)
 
 
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py
@@ -14,7 +14,7 @@
 from pytensor.tensor import swapaxes
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.elemwise import DimShuffle
-from pytensor.tensor.math import _allclose, dot, matmul
+from pytensor.tensor.math import dot, matmul
 from pytensor.tensor.nlinalg import (
     SVD,
     Det,
@@ -42,7 +42,8 @@
 from tests.test_rop import break_op
 
 
-def test_rop_lop():
+def test_matrix_inverse_rop_lop():
+    rtol = 1e-7 if config.floatX == "float64" else 1e-5
     mx = matrix("mx")
     mv = matrix("mv")
     v = vector("v")
@@ -62,23 +63,13 @@ def test_rop_lop():
     vx = np.asarray(rng.standard_normal((4, 4)), pytensor.config.floatX)
     vv = np.asarray(rng.standard_normal((4, 4)), pytensor.config.floatX)
 
-    v1 = rop_f(vx, vv)
-    v2 = scan_f(vx, vv)
+    v_ref = scan_f(vx, vv)
+    np.testing.assert_allclose(rop_f(vx, vv), v_ref, rtol=rtol)
 
-    assert _allclose(v1, v2), f"ROP mismatch: {v1} {v2}"
-
-    raised = False
-    try:
+    with pytest.raises(ValueError):
         pytensor.gradient.Rop(
             pytensor.clone_replace(y, replace={mx: break_op(mx)}), mx, mv
         )
-    except ValueError:
-        raised = True
-    if not raised:
-        raise Exception(
-            "Op did not raised an error even though the function"
-            " is not differentiable"
-        )
 
     vv = np.asarray(rng.uniform(size=(4,)), pytensor.config.floatX)
     yv = pytensor.gradient.Lop(y, mx, v)
@@ -87,9 +78,9 @@ def test_rop_lop():
     sy = pytensor.gradient.grad((v * y).sum(), mx)
     scan_f = function([mx, v], sy)
 
-    v1 = lop_f(vx, vv)
-    v2 = scan_f(vx, vv)
-    assert _allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
+    v_ref = scan_f(vx, vv)
+    v = lop_f(vx, vv)
+    np.testing.assert_allclose(v, v_ref, rtol=rtol)
 
 
 def test_transinv_to_invtrans():
diff --git a/tests/tensor/test_shape.py b/tests/tensor/test_shape.py
@@ -603,7 +603,7 @@ def test_validation(self):
 
 class TestRopLop(RopLopChecker):
     def test_shape(self):
-        self.check_nondiff_rop(self.x.shape[0])
+        self.check_nondiff_rop(self.x.shape[0], self.x, self.v)
 
     def test_specifyshape(self):
         self.check_rop_lop(specify_shape(self.x, self.in_shape), self.in_shape)
diff --git a/tests/test_rop.py b/tests/test_rop.py
@@ -16,8 +16,14 @@
 
 import pytensor
 import pytensor.tensor as pt
-from pytensor import function
-from pytensor.gradient import Lop, Rop, grad, grad_undefined
+from pytensor import config, function
+from pytensor.gradient import (
+    Lop,
+    NullTypeGradError,
+    Rop,
+    grad,
+    grad_undefined,
+)
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
 from pytensor.tensor.math import argmax, dot
@@ -61,6 +67,10 @@ class RopLopChecker:
     Rop to class that inherit from it.
     """
 
+    @staticmethod
+    def rtol():
+        return 1e-7 if config.floatX == "float64" else 1e-5
+
     def setup_method(self):
         # Using vectors make things a lot simpler for generating the same
         # computations using scan
@@ -72,13 +82,13 @@ def setup_method(self):
         self.mv = matrix("mv")
         self.mat_in_shape = (5 + self.rng.integers(3), 5 + self.rng.integers(3))
 
-    def check_nondiff_rop(self, y):
+    def check_nondiff_rop(self, y, x, v):
         """
         If your op is not differentiable(so you can't define Rop)
         test that an error is raised.
         """
         with pytest.raises(ValueError):
-            Rop(y, self.x, self.v)
+            Rop(y, x, v)
 
     def check_mat_rop_lop(self, y, out_shape):
         """
@@ -115,13 +125,13 @@ def check_mat_rop_lop(self, y, out_shape):
         )
         scan_f = function([self.mx, self.mv], sy, on_unused_input="ignore")
 
-        v1 = rop_f(vx, vv)
-        v2 = scan_f(vx, vv)
-
-        assert np.allclose(v1, v2), f"ROP mismatch: {v1} {v2}"
+        v_ref = scan_f(vx, vv)
+        np.testing.assert_allclose(rop_f(vx, vv), v_ref)
 
         self.check_nondiff_rop(
-            pytensor.clone_replace(y, replace={self.mx: break_op(self.mx)})
+            pytensor.clone_replace(y, replace={self.mx: break_op(self.mx)}),
+            self.mx,
+            self.mv,
         )
 
         vv = np.asarray(self.rng.uniform(size=out_shape), pytensor.config.floatX)
@@ -131,15 +141,17 @@ def check_mat_rop_lop(self, y, out_shape):
         sy = grad((self.v * y).sum(), self.mx)
         scan_f = function([self.mx, self.v], sy)
 
-        v1 = lop_f(vx, vv)
-        v2 = scan_f(vx, vv)
-        assert np.allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
+        v = lop_f(vx, vv)
+        v_ref = scan_f(vx, vv)
+        np.testing.assert_allclose(v, v_ref)
 
-    def check_rop_lop(self, y, out_shape):
+    def check_rop_lop(self, y, out_shape, check_nondiff_rop: bool = True):
         """
         As check_mat_rop_lop, except the input is self.x which is a
         vector. The output is still a vector.
         """
+        rtol = self.rtol()
+
         # TEST ROP
         vx = np.asarray(self.rng.uniform(size=self.in_shape), pytensor.config.floatX)
         vv = np.asarray(self.rng.uniform(size=self.in_shape), pytensor.config.floatX)
@@ -152,24 +164,17 @@ def check_rop_lop(self, y, out_shape):
             non_sequences=[y, self.x],
         )
         sy = dot(J, self.v)
-
         scan_f = function([self.x, self.v], sy, on_unused_input="ignore")
 
-        v1 = rop_f(vx, vv)
-        v2 = scan_f(vx, vv)
-        assert np.allclose(v1, v2), f"ROP mismatch: {v1} {v2}"
+        v_ref = scan_f(vx, vv)
+        np.testing.assert_allclose(rop_f(vx, vv), v_ref, rtol=rtol)
 
-        try:
-            Rop(
+        if check_nondiff_rop:
+            self.check_nondiff_rop(
                 pytensor.clone_replace(y, replace={self.x: break_op(self.x)}),
                 self.x,
                 self.v,
             )
-        except ValueError:
-            pytest.skip(
-                "Rop does not handle non-differentiable inputs "
-                "correctly. Bug exposed by fixing Add.grad method."
-            )
 
         vx = np.asarray(self.rng.uniform(size=self.in_shape), pytensor.config.floatX)
         vv = np.asarray(self.rng.uniform(size=out_shape), pytensor.config.floatX)
@@ -182,22 +187,20 @@ def check_rop_lop(self, y, out_shape):
             non_sequences=[y, self.x],
         )
         sy = dot(self.v, J)
-
         scan_f = function([self.x, self.v], sy)
 
-        v1 = lop_f(vx, vv)
-        v2 = scan_f(vx, vv)
-        assert np.allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
+        v = lop_f(vx, vv)
+        v_ref = scan_f(vx, vv)
+        np.testing.assert_allclose(v, v_ref, rtol=rtol)
 
 
 class TestRopLop(RopLopChecker):
     def test_max(self):
-        # self.check_mat_rop_lop(pt_max(self.mx, axis=[0,1])[0], ())
         self.check_mat_rop_lop(pt_max(self.mx, axis=0), (self.mat_in_shape[1],))
         self.check_mat_rop_lop(pt_max(self.mx, axis=1), (self.mat_in_shape[0],))
 
     def test_argmax(self):
-        self.check_nondiff_rop(argmax(self.mx, axis=1))
+        self.check_nondiff_rop(argmax(self.mx, axis=1), self.mx, self.mv)
 
     def test_subtensor(self):
         self.check_rop_lop(self.x[:4], (4,))
@@ -252,10 +255,14 @@ def test_dot(self):
         insh = self.in_shape[0]
         vW = np.asarray(self.rng.uniform(size=(insh, insh)), pytensor.config.floatX)
         W = pytensor.shared(vW)
-        self.check_rop_lop(dot(self.x, W), self.in_shape)
+        # check_nondiff_rop reveals an error in how Rop handles non-differentiable paths
+        # See: test_Rop_partially_differentiable_paths
+        self.check_rop_lop(dot(self.x, W), self.in_shape, check_nondiff_rop=False)
 
     def test_elemwise0(self):
-        self.check_rop_lop((self.x + 1) ** 2, self.in_shape)
+        # check_nondiff_rop reveals an error in how Rop handles non-differentiable paths
+        # See: test_Rop_partially_differentiable_paths
+        self.check_rop_lop((self.x + 1) ** 2, self.in_shape, check_nondiff_rop=False)
 
     def test_elemwise1(self):
         self.check_rop_lop(self.x + pt.cast(self.x, "int32"), self.in_shape)
@@ -288,15 +295,8 @@ def test_alloc(self):
         )
 
     def test_invalid_input(self):
-        success = False
-
-        try:
+        with pytest.raises(ValueError):
             Rop(0.0, [matrix()], [vector()])
-            success = True
-        except ValueError:
-            pass
-
-        assert not success
 
     def test_multiple_outputs(self):
         m = matrix("m")
@@ -322,12 +322,54 @@ def test_multiple_outputs(self):
         f = pytensor.function([m, v, m_, v_], all_outs)
         f(mval, vval, m_val, v_val)
 
-    def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
+    @pytest.mark.xfail()
+    def test_Rop_partially_differentiable_paths(self):
         # This test refers to a bug reported by Jeremiah Lowin on 18th Oct
         # 2013. The bug consists when through a dot operation there is only
         # one differentiable path (i.e. there is no gradient wrt to one of
         # the inputs).
         x = pt.arange(20.0).reshape([1, 20])
-        v = pytensor.shared(np.ones([20]))
+        v = pytensor.shared(np.ones([20]), name="v")
         d = dot(x, v).sum()
-        Rop(grad(d, v), v, v)
+
+        Rop(
+            grad(d, v),
+            v,
+            v,
+            disconnected_outputs="raise",
+        )
+
+        # 2025: Here is an unambiguous test for the original commented issue:
+        x = pt.matrix("x")
+        y = pt.matrix("y")
+        out = dot(x, break_op(y)).sum()
+        # Should not raise an error
+        Rop(
+            out,
+            [x],
+            [x.type()],
+            disconnected_outputs="raise",
+        )
+
+        # More extensive testing shows that the legacy Rop implementation FAILS to raise when
+        # the cost is linked through strictly non-differentiable paths.
+        # This is not Dot specific, we would observe the same with any operation where the gradient
+        # with respect to one of the inputs does not depend on the original input (such as `mul`, `add`, ...)
+        out = dot(break_op(x), y).sum()
+        with pytest.raises((ValueError, NullTypeGradError)):
+            Rop(
+                out,
+                [x],
+                [x.type()],
+                disconnected_outputs="raise",
+            )
+
+        # Only when both paths are non-differentiable is an error correctly raised again.
+        out = dot(break_op(x), break_op(y)).sum()
+        with pytest.raises((ValueError, NullTypeGradError)):
+            Rop(
+                out,
+                [x],
+                [x.type()],
+                disconnected_outputs="raise",
+            )