added rewrites for inv(diag(x)) and inv(orthonormal(x))

tanish1729 · tanish1729 · commit 182cb961c6ad · 2024-07-19T11:41:26.000+05:30
diff --git a/pytensor/tensor/rewriting/linalg.py b/pytensor/tensor/rewriting/linalg.py
@@ -569,3 +569,95 @@ def svd_uv_merge(fgraph, node):
                     or len(fgraph.clients[cl.outputs[2]]) > 0
                 ):
                     return [cl.outputs[1]]
+
+
+@register_canonicalize
+@register_stabilize
+@node_rewriter([Blockwise])
+def rewrite_inv_for_diag_eye_mul(fgraph, node):
+    """
+     This rewrite takes advantage of the fact that for a diagonal matrix, the inverse is a diagonal matrix with the new diagonal entries as reciprocals of the original diagonal elements.
+     This function deals with diagonal matrix arising from the multiplicaton of eye with a scalar/vector/matrix
+
+    Parameters
+    ----------
+    fgraph: FunctionGraph
+        Function graph being optimized
+    node: Apply
+        Node of the function graph to be optimized
+
+    Returns
+    -------
+    list of Variable, optional
+        List of optimized variables, or None if no optimization was performed
+    """
+    # List of useful operations : Inv, Pinv
+    valid_inverses = (MatrixInverse, MatrixPinv)
+    core_op = node.op.core_op
+    if not (isinstance(core_op, valid_inverses)):
+        return None
+
+    # Dealing with diagonal matrix from eye_mul
+    potential_mul_input = node.inputs[0]
+    eye_non_eye_inputs = _find_diag_from_eye_mul(potential_mul_input)
+    if eye_non_eye_inputs is not None:
+        eye_input, non_eye_inputs = eye_non_eye_inputs
+    else:
+        return None
+
+    # Dealing with only one other input
+    if len(non_eye_inputs) != 1:
+        return None
+
+    useful_eye, useful_non_eye = eye_input[0], non_eye_inputs[0]
+
+    # For a matrix, we can first get the diagonal and then only use those
+    if useful_non_eye.type.broadcastable[-2:] == (False, False):
+        # For Matrix
+        return [useful_eye * 1 / useful_non_eye.diagonal(axis1=-1, axis2=-2)]
+    else:
+        # For Scalar/Vector
+        return [useful_eye * 1 / useful_non_eye]
+
+
+def rewrite_inv_for_diag_ptdiag(fgraph, node):
+    pass
+
+
+@register_canonicalize
+@register_stabilize
+@node_rewriter([Blockwise])
+def rewrite_inv_for_orthonormal(fgraph, node):
+    """
+     This rewrite takes advantage of the fact that for an orthonormal matrix, the inverse is simply the transpose.
+     This function deals with orthonormal matrix arising from pt.linalg.svd decomposition (U, Vh) or arising from pt.linalg.qr
+
+    Parameters
+    ----------
+    fgraph: FunctionGraph
+        Function graph being optimized
+    node: Apply
+        Node of the function graph to be optimized
+
+    Returns
+    -------
+    list of Variable, optional
+        List of optimized variables, or None if no optimization was performed
+    """
+    # Dealing with orthonormal matrix from SVD
+    # Check if input to Inverse is coming from SVD
+    input_to_inv = node.inputs[0]
+    # Check if this input is coming from SVD with compute_uv = True
+    if not (
+        input_to_inv.owner
+        and isinstance(input_to_inv.owner.op, Blockwise)
+        and isinstance(input_to_inv.owner.op.core_op, SVD)
+        and input_to_inv.owner.op.core_op.compute_uv is True
+    ):
+        return None
+
+    # To make sure input is orthonormal, we have to check that its not S (output order is U, S, Vh, so S is index 1)
+    if input_to_inv == input_to_inv.owner.outputs[1]:
+        return None
+
+    return [input_to_inv.T]
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py
@@ -554,3 +554,68 @@ def test_svd_uv_merge():
             assert node.op.compute_uv
             svd_counter += 1
     assert svd_counter == 1
+
+
+@pytest.mark.parametrize(
+    "shape",
+    [(), (7,), (7, 7)],
+    ids=["scalar", "vector", "matrix"],
+)
+def test_inv_diag_from_eye_mul(shape):
+    # Initializing x based on scalar/vector/matrix
+    x = pt.tensor("x", shape=shape)
+    x_diag = pt.eye(7) * x
+    # Calculating inverse using pt.linalg.inv
+    x_inv = pt.linalg.inv(x_diag)
+
+    # REWRITE TEST
+    f_rewritten = function([x], x_inv, mode="FAST_RUN")
+    nodes = f_rewritten.maker.fgraph.apply_nodes
+
+    valid_inverses = (MatrixInverse, MatrixPinv)
+    assert not any(isinstance(node.op, valid_inverses) for node in nodes)
+
+    # NUMERIC VALUE TEST
+    if len(shape) == 0:
+        x_test = np.array(np.random.rand()).astype(config.floatX)
+    elif len(shape) == 1:
+        x_test = np.random.rand(*shape).astype(config.floatX)
+    else:
+        x_test = np.random.rand(*shape).astype(config.floatX)
+    x_test_matrix = np.eye(7) * x_test
+    inverse_matrix = np.linalg.inv(x_test_matrix)
+    rewritten_inverse = f_rewritten(x_test)
+
+    assert_allclose(
+        inverse_matrix,
+        rewritten_inverse,
+        atol=1e-3 if config.floatX == "float32" else 1e-8,
+        rtol=1e-3 if config.floatX == "float32" else 1e-8,
+    )
+
+
+def test_inv_orthonormal():
+    x = pt.dmatrix("x")
+    u, s, vh = pt.linalg.svd(x)
+    # Calculating inverse using pt.linalg.inv
+    u_inv = pt.linalg.inv(u)
+    print(u_inv.dprint())
+    # REWRITE TEST
+    f_rewritten = function([x], u_inv, mode="FAST_RUN")
+    nodes = f_rewritten.maker.fgraph.apply_nodes
+
+    valid_inverses = (MatrixInverse, MatrixPinv)
+    assert not any(isinstance(node.op, valid_inverses) for node in nodes)
+
+    # NUMERIC VALUE TEST
+    x_test = np.random.rand(7, 7).astype(config.floatX)
+    u_test, _, _ = np.linalg.svd(x_test)
+    inverse_matrix = np.linalg.inv(u_test)
+    rewritten_inverse = f_rewritten(x_test)
+
+    assert_allclose(
+        inverse_matrix,
+        rewritten_inverse,
+        atol=1e-3 if config.floatX == "float32" else 1e-8,
+        rtol=1e-3 if config.floatX == "float32" else 1e-8,
+    )