From 2c3d8e9655aad131928b4c55d11f6ba0141d221a Mon Sep 17 00:00:00 2001
From: Baio0 <laobai312@gmail.com>
Date: Wed, 31 Jan 2024 21:53:07 -0500
Subject: [PATCH 1/3] new file:   ot/partial_gw.py

---
 ot/partial_gw.py | 250 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 250 insertions(+)
 create mode 100644 ot/partial_gw.py

diff --git a/ot/partial_gw.py b/ot/partial_gw.py
new file mode 100644
index 000000000..8b7ce35c2
--- /dev/null
+++ b/ot/partial_gw.py
@@ -0,0 +1,250 @@
+#@author: Yikun_Bai 
+
+import numpy as np
+from .lp import emd
+from .backend import get_backend
+from .utils import list_to_array
+
+
+def gwgrad_partial(C1, C2, T):
+    """Compute the GW gradient. Note: we can not use the trick in :ref:`[12] <references-gwgrad-partial>`
+    as the marginals may not sum to 1.
+
+    Parameters
+    ----------
+    C1: array of shape (n_p,n_p)
+        intra-source (P) cost matrix
+
+    C2: array of shape (n_u,n_u)
+        intra-target (U) cost matrix
+
+    T : array of shape(n_p+nb_dummies, n_u) (default: None)
+        Transport matrix
+
+    Returns
+    -------
+    numpy.array of shape (n_p+nb_dummies, n_u)
+        gradient
+
+
+    .. _references-gwgrad-partial:
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+        "Gromov-Wasserstein averaging of kernel and distance matrices."
+        International Conference on Machine Learning (ICML). 2016.
+    """
+    cC1 = np.dot(C1 ** 2 / 2, np.dot(T, np.ones(C2.shape[0]).reshape(-1, 1)))
+    cC2 = np.dot(np.dot(np.ones(C1.shape[0]).reshape(1, -1), T), C2 ** 2 / 2)
+    constC = cC1 + cC2
+    A = -np.dot(C1, T).dot(C2.T)
+    tens = constC + A
+    return tens * 2
+
+
+def gwloss_partial(C1, C2, T):
+    """Compute the GW loss.
+
+    Parameters
+    ----------
+    C1: array of shape (n_p,n_p)
+        intra-source (P) cost matrix
+
+    C2: array of shape (n_u,n_u)
+        intra-target (U) cost matrix
+
+    T : array of shape(n_p+nb_dummies, n_u) (default: None)
+        Transport matrix
+
+    Returns
+    -------
+    GW loss
+    """
+    g = gwgrad_partial(C1, C2, T) * 0.5
+    return np.sum(g * T)
+
+
+def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
+                               thres=1, numItermax=1000, tol=1e-7,
+                               log=False, verbose=False, **kwargs):
+    r"""
+    Solves the partial optimal transport problem
+    and returns the OT plan
+
+    The function considers the following problem:
+
+    .. math::
+        \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
+
+    .. math::
+        s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+             \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+             \gamma &\geq 0
+
+             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
+
+    where :
+
+    - :math:`\mathbf{M}` is the metric cost matrix
+    - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
+    - `m` is the amount of mass to be transported
+
+    The formulation of the problem has been proposed in
+    :ref:`[29] <references-partial-gromov-wasserstein>`
+
+
+    Parameters
+    ----------
+    C1 : ndarray, shape (ns, ns)
+        Metric cost matrix in the source space
+    C2 : ndarray, shape (nt, nt)
+        Metric costfr matrix in the target space
+    p : ndarray, shape (ns,)
+        Distribution in the source space
+    q : ndarray, shape (nt,)
+        Distribution in the target space
+    m : float, optional
+        Amount of mass to be transported
+        (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
+    nb_dummies : int, optional
+        Number of dummy points to add (avoid instabilities in the EMD solver)
+    G0 : ndarray, shape (ns, nt), optional
+        Initialization of the transportation matrix
+    thres : float, optional
+        quantile of the gradient matrix to populate the cost matrix when 0
+        (default: 1)
+    numItermax : int, optional
+        Max number of iterations
+    tol : float, optional
+        tolerance for stopping iterations
+    log : bool, optional
+        return log if True
+    verbose : bool, optional
+        Print information along iterations
+    **kwargs : dict
+        parameters can be directly passed to the emd solver
+
+
+    Returns
+    -------
+    gamma : (dim_a, dim_b) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary returned only if `log` is `True`
+
+
+    Examples
+    --------
+    >>> import ot
+    >>> import scipy as sp
+    >>> a = np.array([0.25] * 4)
+    >>> b = np.array([0.25] * 4)
+    >>> x = np.array([1,2,100,200]).reshape((-1,1))
+    >>> y = np.array([3,2,98,199]).reshape((-1,1))
+    >>> C1 = sp.spatial.distance.cdist(x, x)
+    >>> C2 = sp.spatial.distance.cdist(y, y)
+    >>> np.round(partial_gromov_wasserstein(C1, C2, a, b),2)
+    array([[0.  , 0.25, 0.  , 0.  ],
+           [0.25, 0.  , 0.  , 0.  ],
+           [0.  , 0.  , 0.25, 0.  ],
+           [0.  , 0.  , 0.  , 0.25]])
+    >>> np.round(partial_gromov_wasserstein(C1, C2, a, b, m=0.25),2)
+    array([[0.  , 0.  , 0.  , 0.  ],
+           [0.  , 0.  , 0.  , 0.  ],
+           [0.  , 0.  , 0.25, 0.  ],
+           [0.  , 0.  , 0.  , 0.  ]])
+
+
+    .. _references-partial-gromov-wasserstein:
+    References
+    ----------
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
+
+    """
+
+    if m is None:
+        m = np.min((np.sum(p), np.sum(q)))
+    elif m < 0:
+        raise ValueError("Problem infeasible. Parameter m should be greater"
+                         " than 0.")
+    elif m > np.min((np.sum(p), np.sum(q))):
+        raise ValueError("Problem infeasible. Parameter m should lower or"
+                         " equal than min(|a|_1, |b|_1).")
+
+    if G0 is None:
+        G0 = np.outer(p, q)*m/(np.sum(p)*np.sum(q)) # make sure |G0|=m, G01_m\leq p, G0.T1_n\leq q. 
+
+    dim_G_extended = (len(p) + nb_dummies, len(q) + nb_dummies)
+    q_extended = np.append(q, [(np.sum(p) - m) / nb_dummies] * nb_dummies)
+    p_extended = np.append(p, [(np.sum(q) - m) / nb_dummies] * nb_dummies)
+
+    cpt = 0
+    err = 1
+
+    if log:
+        log = {'err': []}
+
+    while (err > tol and cpt < numItermax):
+
+        Gprev = np.copy(G0)
+
+        M = gwgrad_partial(C1, C2, G0)
+
+        
+        M_emd = np.zeros(dim_G_extended)
+        M_emd[:len(p), :len(q)] = M
+        M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e2
+        M_emd = np.asarray(M_emd, dtype=np.float64)
+        # Yikun: I do not understand the line 201 
+        # I think something like the following should work based on (1) in [29] and ot.partial.partial_wasserstein
+        #M_emd[-nb_dummies:, -nb_dummies:] = np.max(M)+1.0 or np.max(M)*2 
+
+
+        Gc, logemd = emd(p_extended, q_extended, M_emd, log=True, **kwargs)
+
+        if logemd['warning'] is not None:
+            raise ValueError("Error in the EMD resolution: try to increase the"
+                             " number of dummy points")
+
+        G0 = Gc[:len(p), :len(q)]
+
+        if cpt % 10 == 0:  # to speed up the computations
+            err = np.linalg.norm(G0 - Gprev)
+            if log:
+                log['err'].append(err)
+            if verbose:
+                if cpt % 200 == 0:
+                    print('{:5s}|{:12s}|{:12s}'.format(
+                        'It.', 'Err', 'Loss') + '\n' + '-' * 31)
+                print('{:5d}|{:8e}|{:8e}'.format(cpt, err,
+                                                 gwloss_partial(C1, C2, G0)))
+
+        deltaG = G0 - Gprev
+        a = 2* gwloss_partial(C1, C2, deltaG) # a=<M\circ deltaG, G> 
+        b = 2 * np.sum(M * deltaG) # b=<M\circ Gprev, deltaG>
+        if a > 0:  # due to numerical precision
+            if b>=0:
+                gamma = 0
+                cpt = numItermax
+            else:
+                gamma = min(1, np.divide(-b, 2.0 * a))
+        else:
+            if (a + b) < 0:
+                gamma = 1
+            else:
+                gamma = 0
+                cpt = numItermax
+
+        G0 = Gprev + gamma * deltaG
+        cpt += 1
+
+    if log:
+        log['partial_gw_dist'] = gwloss_partial(C1, C2, G0)
+        return G0[:len(p), :len(q)], log
+    else:
+        return G0[:len(p), :len(q)]
\ No newline at end of file

From 0e4eda4b1d87146838f2e83a497167a33cbd9b77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Vincent-Cuaz?= <cedvincentcuaz@gmail.com>
Date: Fri, 21 Jun 2024 11:31:38 +0200
Subject: [PATCH 2/3] remove partial_gw.py to update existing file partial.py

---
 RELEASES.md      |   1 +
 ot/partial.py    |  16 +--
 ot/partial_gw.py | 250 -----------------------------------------------
 3 files changed, 10 insertions(+), 257 deletions(-)
 delete mode 100644 ot/partial_gw.py

diff --git a/RELEASES.md b/RELEASES.md
index e6e6ff4d4..77863b640 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -19,6 +19,7 @@
 - Fix same sign error for sr(F)GW conditional gradient solvers (PR #611)
 - Split `test/test_gromov.py` into `test/gromov/` (PR #619)
 - Fix (F)GW barycenter functions to support computing barycenter on 1 input + deprecate structures as lists (PR #628)
+- Fix line-search in partial GW and change default init to the interior of partial transport plans (PR #602)
 
 ## 0.9.3
 *January 2024*
diff --git a/ot/partial.py b/ot/partial.py
index 85635c9ba..00d3449ea 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -4,12 +4,14 @@
 """
 
 # Author: Laetitia Chapel <laetitia.chapel@irisa.fr>
-# License: MIT License
-
-import numpy as np
-from .lp import emd
-from .backend import get_backend
 from .utils import list_to_array
+from .backend import get_backend
+from .lp import emd
+import numpy as np
+Yikun Bai < yikun.bai @ vanderbilt.edu >
+Cédric Vincent - Cuaz < cedvincentcuaz @ gmail.com >
+
+# License: MIT License
 
 
 def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
@@ -581,7 +583,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
                          " equal than min(|a|_1, |b|_1).")
 
     if G0 is None:
-        G0 = np.outer(p, q)
+        G0 = np.outer(p, q) * m / (np.sum(p) * np.sum(q))  # make sure |G0|=m, G01_m\leq p, G0.T1_n\leq q.
 
     dim_G_extended = (len(p) + nb_dummies, len(q) + nb_dummies)
     q_extended = np.append(q, [(np.sum(p) - m) / nb_dummies] * nb_dummies)
@@ -597,7 +599,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
         Gprev = np.copy(G0)
 
-        M = gwgrad_partial(C1, C2, G0)
+        M = 0.5 * gwgrad_partial(C1, C2, G0)  # rescaling the gradient with 0.5 for line-search while not changing Gc
         M_emd = np.zeros(dim_G_extended)
         M_emd[:len(p), :len(q)] = M
         M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e2
diff --git a/ot/partial_gw.py b/ot/partial_gw.py
deleted file mode 100644
index 8b7ce35c2..000000000
--- a/ot/partial_gw.py
+++ /dev/null
@@ -1,250 +0,0 @@
-#@author: Yikun_Bai 
-
-import numpy as np
-from .lp import emd
-from .backend import get_backend
-from .utils import list_to_array
-
-
-def gwgrad_partial(C1, C2, T):
-    """Compute the GW gradient. Note: we can not use the trick in :ref:`[12] <references-gwgrad-partial>`
-    as the marginals may not sum to 1.
-
-    Parameters
-    ----------
-    C1: array of shape (n_p,n_p)
-        intra-source (P) cost matrix
-
-    C2: array of shape (n_u,n_u)
-        intra-target (U) cost matrix
-
-    T : array of shape(n_p+nb_dummies, n_u) (default: None)
-        Transport matrix
-
-    Returns
-    -------
-    numpy.array of shape (n_p+nb_dummies, n_u)
-        gradient
-
-
-    .. _references-gwgrad-partial:
-    References
-    ----------
-    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
-        "Gromov-Wasserstein averaging of kernel and distance matrices."
-        International Conference on Machine Learning (ICML). 2016.
-    """
-    cC1 = np.dot(C1 ** 2 / 2, np.dot(T, np.ones(C2.shape[0]).reshape(-1, 1)))
-    cC2 = np.dot(np.dot(np.ones(C1.shape[0]).reshape(1, -1), T), C2 ** 2 / 2)
-    constC = cC1 + cC2
-    A = -np.dot(C1, T).dot(C2.T)
-    tens = constC + A
-    return tens * 2
-
-
-def gwloss_partial(C1, C2, T):
-    """Compute the GW loss.
-
-    Parameters
-    ----------
-    C1: array of shape (n_p,n_p)
-        intra-source (P) cost matrix
-
-    C2: array of shape (n_u,n_u)
-        intra-target (U) cost matrix
-
-    T : array of shape(n_p+nb_dummies, n_u) (default: None)
-        Transport matrix
-
-    Returns
-    -------
-    GW loss
-    """
-    g = gwgrad_partial(C1, C2, T) * 0.5
-    return np.sum(g * T)
-
-
-def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
-                               thres=1, numItermax=1000, tol=1e-7,
-                               log=False, verbose=False, **kwargs):
-    r"""
-    Solves the partial optimal transport problem
-    and returns the OT plan
-
-    The function considers the following problem:
-
-    .. math::
-        \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
-
-    .. math::
-        s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
-
-             \gamma^T \mathbf{1} &\leq \mathbf{b}
-
-             \gamma &\geq 0
-
-             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
-
-    where :
-
-    - :math:`\mathbf{M}` is the metric cost matrix
-    - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
-    - `m` is the amount of mass to be transported
-
-    The formulation of the problem has been proposed in
-    :ref:`[29] <references-partial-gromov-wasserstein>`
-
-
-    Parameters
-    ----------
-    C1 : ndarray, shape (ns, ns)
-        Metric cost matrix in the source space
-    C2 : ndarray, shape (nt, nt)
-        Metric costfr matrix in the target space
-    p : ndarray, shape (ns,)
-        Distribution in the source space
-    q : ndarray, shape (nt,)
-        Distribution in the target space
-    m : float, optional
-        Amount of mass to be transported
-        (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
-    nb_dummies : int, optional
-        Number of dummy points to add (avoid instabilities in the EMD solver)
-    G0 : ndarray, shape (ns, nt), optional
-        Initialization of the transportation matrix
-    thres : float, optional
-        quantile of the gradient matrix to populate the cost matrix when 0
-        (default: 1)
-    numItermax : int, optional
-        Max number of iterations
-    tol : float, optional
-        tolerance for stopping iterations
-    log : bool, optional
-        return log if True
-    verbose : bool, optional
-        Print information along iterations
-    **kwargs : dict
-        parameters can be directly passed to the emd solver
-
-
-    Returns
-    -------
-    gamma : (dim_a, dim_b) ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary returned only if `log` is `True`
-
-
-    Examples
-    --------
-    >>> import ot
-    >>> import scipy as sp
-    >>> a = np.array([0.25] * 4)
-    >>> b = np.array([0.25] * 4)
-    >>> x = np.array([1,2,100,200]).reshape((-1,1))
-    >>> y = np.array([3,2,98,199]).reshape((-1,1))
-    >>> C1 = sp.spatial.distance.cdist(x, x)
-    >>> C2 = sp.spatial.distance.cdist(y, y)
-    >>> np.round(partial_gromov_wasserstein(C1, C2, a, b),2)
-    array([[0.  , 0.25, 0.  , 0.  ],
-           [0.25, 0.  , 0.  , 0.  ],
-           [0.  , 0.  , 0.25, 0.  ],
-           [0.  , 0.  , 0.  , 0.25]])
-    >>> np.round(partial_gromov_wasserstein(C1, C2, a, b, m=0.25),2)
-    array([[0.  , 0.  , 0.  , 0.  ],
-           [0.  , 0.  , 0.  , 0.  ],
-           [0.  , 0.  , 0.25, 0.  ],
-           [0.  , 0.  , 0.  , 0.  ]])
-
-
-    .. _references-partial-gromov-wasserstein:
-    References
-    ----------
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
-        Transport with Applications on Positive-Unlabeled Learning".
-        NeurIPS.
-
-    """
-
-    if m is None:
-        m = np.min((np.sum(p), np.sum(q)))
-    elif m < 0:
-        raise ValueError("Problem infeasible. Parameter m should be greater"
-                         " than 0.")
-    elif m > np.min((np.sum(p), np.sum(q))):
-        raise ValueError("Problem infeasible. Parameter m should lower or"
-                         " equal than min(|a|_1, |b|_1).")
-
-    if G0 is None:
-        G0 = np.outer(p, q)*m/(np.sum(p)*np.sum(q)) # make sure |G0|=m, G01_m\leq p, G0.T1_n\leq q. 
-
-    dim_G_extended = (len(p) + nb_dummies, len(q) + nb_dummies)
-    q_extended = np.append(q, [(np.sum(p) - m) / nb_dummies] * nb_dummies)
-    p_extended = np.append(p, [(np.sum(q) - m) / nb_dummies] * nb_dummies)
-
-    cpt = 0
-    err = 1
-
-    if log:
-        log = {'err': []}
-
-    while (err > tol and cpt < numItermax):
-
-        Gprev = np.copy(G0)
-
-        M = gwgrad_partial(C1, C2, G0)
-
-        
-        M_emd = np.zeros(dim_G_extended)
-        M_emd[:len(p), :len(q)] = M
-        M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e2
-        M_emd = np.asarray(M_emd, dtype=np.float64)
-        # Yikun: I do not understand the line 201 
-        # I think something like the following should work based on (1) in [29] and ot.partial.partial_wasserstein
-        #M_emd[-nb_dummies:, -nb_dummies:] = np.max(M)+1.0 or np.max(M)*2 
-
-
-        Gc, logemd = emd(p_extended, q_extended, M_emd, log=True, **kwargs)
-
-        if logemd['warning'] is not None:
-            raise ValueError("Error in the EMD resolution: try to increase the"
-                             " number of dummy points")
-
-        G0 = Gc[:len(p), :len(q)]
-
-        if cpt % 10 == 0:  # to speed up the computations
-            err = np.linalg.norm(G0 - Gprev)
-            if log:
-                log['err'].append(err)
-            if verbose:
-                if cpt % 200 == 0:
-                    print('{:5s}|{:12s}|{:12s}'.format(
-                        'It.', 'Err', 'Loss') + '\n' + '-' * 31)
-                print('{:5d}|{:8e}|{:8e}'.format(cpt, err,
-                                                 gwloss_partial(C1, C2, G0)))
-
-        deltaG = G0 - Gprev
-        a = 2* gwloss_partial(C1, C2, deltaG) # a=<M\circ deltaG, G> 
-        b = 2 * np.sum(M * deltaG) # b=<M\circ Gprev, deltaG>
-        if a > 0:  # due to numerical precision
-            if b>=0:
-                gamma = 0
-                cpt = numItermax
-            else:
-                gamma = min(1, np.divide(-b, 2.0 * a))
-        else:
-            if (a + b) < 0:
-                gamma = 1
-            else:
-                gamma = 0
-                cpt = numItermax
-
-        G0 = Gprev + gamma * deltaG
-        cpt += 1
-
-    if log:
-        log['partial_gw_dist'] = gwloss_partial(C1, C2, G0)
-        return G0[:len(p), :len(q)], log
-    else:
-        return G0[:len(p), :len(q)]
\ No newline at end of file

From b8622485962d66bdf5a28bc2f87ac9bc2df40d64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Vincent-Cuaz?= <cedvincentcuaz@gmail.com>
Date: Fri, 21 Jun 2024 11:34:26 +0200
Subject: [PATCH 3/3] fix pep8

---
 ot/partial.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ot/partial.py b/ot/partial.py
index 00d3449ea..a3b25a856 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -4,12 +4,13 @@
 """
 
 # Author: Laetitia Chapel <laetitia.chapel@irisa.fr>
+#             Yikun Bai < yikun.bai@vanderbilt.edu >
+#             Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
+
 from .utils import list_to_array
 from .backend import get_backend
 from .lp import emd
 import numpy as np
-Yikun Bai < yikun.bai @ vanderbilt.edu >
-Cédric Vincent - Cuaz < cedvincentcuaz @ gmail.com >
 
 # License: MIT License