PythonOT
diff --git a/‎README.md
Lines changed: 5 additions & 1 deletion b/‎README.md
Lines changed: 5 additions & 1 deletion
diff --git a/‎RELEASES.md
Lines changed: 3 additions & 0 deletions b/‎RELEASES.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/barycenters/plot_gaussian_barycenter.py
Lines changed: 127 additions & 0 deletions b/‎examples/barycenters/plot_gaussian_barycenter.py
Lines changed: 127 additions & 0 deletions
diff --git a/‎examples/gromov/plot_fgw_solvers.py
Lines changed: 89 additions & 44 deletions b/‎examples/gromov/plot_fgw_solvers.py
Lines changed: 89 additions & 44 deletions
@@ -344,4 +344,8 @@ distances between Gaussian distributions](https://hal.science/hal-03197398v2/fil
 
 [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty (2023). [Interpolating between Clustering and Dimensionality Reduction with Gromov-Wasserstein](https://arxiv.org/pdf/2310.03398.pdf). NeurIPS 2023 Workshop Optimal Transport and Machine Learning.
 
-[63] Scetbon, M., Cuturi, M., & Peyré, G. (2021). [Low-Rank Sinkhorn Factorization](https://arxiv.org/pdf/2103.04737.pdf).
+[63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. (2022). [A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in Graph Data](https://openreview.net/pdf?id=0jxPyVWmiiF). In The Eleventh International Conference on Learning Representations.
+
+[64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W. (2023). [Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications](https://openreview.net/pdf?id=uqkUguNu40). In Thirty-seventh Conference on Neural Information Processing Systems.
+
+[65] Scetbon, M., Cuturi, M., & Peyré, G. (2021). [Low-Rank Sinkhorn Factorization](https://arxiv.org/pdf/2103.04737.pdf).
@@ -20,8 +20,11 @@
 + Wrapper for `geomloss`` solver on empirical samples (PR #571)
 + Add `stop_criterion` feature to (un)regularized (f)gw barycenter solvers (PR #578)
 + Add `fixed_structure` and `fixed_features` to entropic fgw barycenter solver (PR #578)
++ Add new BAPG solvers with KL projections for GW and FGW (PR #581)
++ Add Bures-Wasserstein barycenter in `ot.gaussian` and example (PR #582, PR #584)
 + Added support for [Low-Rank Sinkhorn Factorization](https://arxiv.org/pdf/2103.04737.pdf) (PR #568)
 
+
 #### Closed issues
 - Fix line search evaluating cost outside of the interpolation range (Issue #502, PR #504)
 - Lazily instantiate backends to avoid unnecessary GPU memory pre-allocations on package import (Issue #516, PR #520)
 
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+"""
+========================================================
+Gaussian Bures-Wasserstein barycenters
+========================================================
+
+Illustration of Gaussian Bures-Wasserstein barycenters.
+
+"""
+
+# Authors: Rémi Flamary <remi.flamary@polytechnique.edu>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 2
+# %%
+from matplotlib import colors
+from matplotlib.patches import Ellipse
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+
+
+# %%
+# Define Gaussian Covariances and distributions
+# ---------------------------------------------
+
+C1 = np.array([[0.5, -0.4], [-0.4, 0.5]])
+C2 = np.array([[1, 0.3], [0.3, 1]])
+C3 = np.array([[1.5, 0], [0, 0.5]])
+C4 = np.array([[0.5, 0], [0, 1.5]])
+
+C = np.stack((C1, C2, C3, C4))
+
+m1 = np.array([0, 0])
+m2 = np.array([0, 4])
+m3 = np.array([4, 0])
+m4 = np.array([4, 4])
+
+m = np.stack((m1, m2, m3, m4))
+
+# %%
+# Plot the distributions
+# ----------------------
+
+
+def draw_cov(mu, C, color=None, label=None, nstd=1):
+
+    def eigsorted(cov):
+        vals, vecs = np.linalg.eigh(cov)
+        order = vals.argsort()[::-1]
+        return vals[order], vecs[:, order]
+
+    vals, vecs = eigsorted(C)
+    theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))
+    w, h = 2 * nstd * np.sqrt(vals)
+    ell = Ellipse(xy=(mu[0], mu[1]),
+                  width=w, height=h, alpha=0.5,
+                  angle=theta, facecolor=color, edgecolor=color, label=label, fill=True)
+    pl.gca().add_artist(ell)
+    #pl.scatter(mu[0],mu[1],color=color, marker='x')
+
+
+axis = [-1.5, 5.5, -1.5, 5.5]
+
+pl.figure(1, (8, 2))
+pl.clf()
+
+pl.subplot(1, 4, 1)
+draw_cov(m1, C1, color='C0')
+pl.axis(axis)
+pl.title('$\mathcal{N}(m_1,\Sigma_1)$')
+
+pl.subplot(1, 4, 2)
+draw_cov(m2, C2, color='C1')
+pl.axis(axis)
+pl.title('$\mathcal{N}(m_2,\Sigma_2)$')
+
+pl.subplot(1, 4, 3)
+draw_cov(m3, C3, color='C2')
+pl.axis(axis)
+pl.title('$\mathcal{N}(m_3,\Sigma_3)$')
+
+pl.subplot(1, 4, 4)
+draw_cov(m4, C4, color='C3')
+pl.axis(axis)
+pl.title('$\mathcal{N}(m_4,\Sigma_4)$')
+
+# %%
+# Compute Bures-Wasserstein barycenters and plot them
+# -------------------------------------------
+
+# basis for bilinear interpolation
+v1 = np.array((1, 0, 0, 0))
+v2 = np.array((0, 1, 0, 0))
+v3 = np.array((0, 0, 1, 0))
+v4 = np.array((0, 0, 0, 1))
+
+
+colors = np.stack((colors.to_rgb('C0'),
+                   colors.to_rgb('C1'),
+                   colors.to_rgb('C2'),
+                   colors.to_rgb('C3')))
+
+pl.figure(2, (8, 8))
+
+nb_interp = 6
+
+for i in range(nb_interp):
+    for j in range(nb_interp):
+        tx = float(i) / (nb_interp - 1)
+        ty = float(j) / (nb_interp - 1)
+
+        # weights are constructed by bilinear interpolation
+        tmp1 = (1 - tx) * v1 + tx * v2
+        tmp2 = (1 - tx) * v3 + tx * v4
+        weights = (1 - ty) * tmp1 + ty * tmp2
+
+        color = np.dot(colors.T, weights)
+
+        mb, Cb = ot.gaussian.bures_wasserstein_barycenter(m, C, weights)
+
+        draw_cov(mb, Cb, color=color, label=None, nstd=0.3)
+
+pl.axis(axis)
+pl.axis('off')
+pl.tight_layout()
@@ -5,8 +5,9 @@
 ==============================
 
 This example illustrates the computation of FGW for attributed graphs
-using 3 different solvers to estimate the distance based on Conditional
-Gradient [24] or Sinkhorn projections [12, 51].
+using 4 different solvers to estimate the distance based on Conditional
+Gradient [24], Sinkhorn projections [12, 51] and alternated Bregman
+projections [63, 64].
 
 We generate two graphs following Stochastic Block Models further endowed with
 node features and compute their FGW matchings.
@@ -23,6 +24,16 @@
 [51] Xu, H., Luo, D., Zha, H., & Duke, L. C. (2019).
 "Gromov-wasserstein learning for graph matching and node embedding".
 In International Conference on Machine Learning (ICML), 2019.
+
+[63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J.
+"A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in
+Graph Data". International Conference on Learning Representations (ICLR), 2023.
+
+[64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W.
+"Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications".
+In Thirty-seventh Conference on Neural Information Processing Systems
+(NeurIPS), 2023.
+
 """
 
 # Author: Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
@@ -33,9 +44,12 @@
 
 import numpy as np
 import matplotlib.pylab as pl
-from ot.gromov import fused_gromov_wasserstein, entropic_fused_gromov_wasserstein
+from ot.gromov import (fused_gromov_wasserstein,
+                       entropic_fused_gromov_wasserstein,
+                       BAPG_fused_gromov_wasserstein)
 import networkx
 from networkx.generators.community import stochastic_block_model as sbm
+from time import time
 
 #############################################################################
 #
@@ -85,34 +99,59 @@
 
 
 # Conditional Gradient algorithm
-fgw0, log0 = fused_gromov_wasserstein(
-    M, C2, C3, h2, h3, 'square_loss', alpha=alpha, verbose=True, log=True)
+print('Conditional Gradient \n')
+start_cg = time()
+T_cg, log_cg = fused_gromov_wasserstein(
+    M, C2, C3, h2, h3, 'square_loss', alpha=alpha, tol_rel=1e-9,
+    verbose=True, log=True)
+end_cg = time()
+time_cg = 1000 * (end_cg - start_cg)
 
 # Proximal Point algorithm with Kullback-Leibler as proximal operator
-fgw, log = entropic_fused_gromov_wasserstein(
+print('Proximal Point Algorithm \n')
+start_ppa = time()
+T_ppa, log_ppa = entropic_fused_gromov_wasserstein(
     M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=1., solver='PPA',
-    log=True, verbose=True, warmstart=False, numItermax=10)
+    tol=1e-9, log=True, verbose=True, warmstart=False, numItermax=10)
+end_ppa = time()
+time_ppa = 1000 * (end_ppa - start_ppa)
 
 # Projected Gradient algorithm with entropic regularization
-fgwe, loge = entropic_fused_gromov_wasserstein(
+print('Projected Gradient Descent \n')
+start_pgd = time()
+T_pgd, log_pgd = entropic_fused_gromov_wasserstein(
     M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=0.01, solver='PGD',
-    log=True, verbose=True, warmstart=False, numItermax=10)
-
-print('Fused Gromov-Wasserstein distance estimated with Conditional Gradient solver: ' + str(log0['fgw_dist']))
-print('Fused Gromov-Wasserstein distance estimated with Proximal Point solver: ' + str(log['fgw_dist']))
-print('Entropic Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(loge['fgw_dist']))
+    tol=1e-9, log=True, verbose=True, warmstart=False, numItermax=10)
+end_pgd = time()
+time_pgd = 1000 * (end_pgd - start_pgd)
+
+# Alternated Bregman Projected Gradient algorithm with Kullback-Leibler as proximal operator
+print('Bregman Alternated Projected Gradient \n')
+start_bapg = time()
+T_bapg, log_bapg = BAPG_fused_gromov_wasserstein(
+    M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=1.,
+    tol=1e-9, marginal_loss=True, verbose=True, log=True)
+end_bapg = time()
+time_bapg = 1000 * (end_bapg - start_bapg)
+
+print('Fused Gromov-Wasserstein distance estimated with Conditional Gradient solver: ' + str(log_cg['fgw_dist']))
+print('Fused Gromov-Wasserstein distance estimated with Proximal Point solver: ' + str(log_ppa['fgw_dist']))
+print('Entropic Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(log_pgd['fgw_dist']))
+print('Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(log_bapg['fgw_dist']))
 
 # compute OT sparsity level
-fgw0_sparsity = 100 * (fgw0 == 0.).astype(np.float64).sum() / (N2 * N3)
-fgw_sparsity = 100 * (fgw == 0.).astype(np.float64).sum() / (N2 * N3)
-fgwe_sparsity = 100 * (fgwe == 0.).astype(np.float64).sum() / (N2 * N3)
+T_cg_sparsity = 100 * (T_cg == 0.).astype(np.float64).sum() / (N2 * N3)
+T_ppa_sparsity = 100 * (T_ppa == 0.).astype(np.float64).sum() / (N2 * N3)
+T_pgd_sparsity = 100 * (T_pgd == 0.).astype(np.float64).sum() / (N2 * N3)
+T_bapg_sparsity = 100 * (T_bapg == 0.).astype(np.float64).sum() / (N2 * N3)
 
-# Methods using Sinkhorn projections tend to produce feasibility errors on the
+# Methods using Sinkhorn/Bregman projections tend to produce feasibility errors on the
 # marginal constraints
 
-err0 = np.linalg.norm(fgw0.sum(1) - h2) + np.linalg.norm(fgw0.sum(0) - h3)
-err = np.linalg.norm(fgw.sum(1) - h2) + np.linalg.norm(fgw.sum(0) - h3)
-erre = np.linalg.norm(fgwe.sum(1) - h2) + np.linalg.norm(fgwe.sum(0) - h3)
+err_cg = np.linalg.norm(T_cg.sum(1) - h2) + np.linalg.norm(T_cg.sum(0) - h3)
+err_ppa = np.linalg.norm(T_ppa.sum(1) - h2) + np.linalg.norm(T_ppa.sum(0) - h3)
+err_pgd = np.linalg.norm(T_pgd.sum(1) - h2) + np.linalg.norm(T_pgd.sum(0) - h3)
+err_bapg = np.linalg.norm(T_bapg.sum(1) - h2) + np.linalg.norm(T_bapg.sum(0) - h3)
 
 #############################################################################
 #
@@ -242,46 +281,52 @@ def draw_transp_colored_GW(G1, C1, G2, C2, part_G1, p1, p2, T,
 seed_G2 = 0
 seed_G3 = 4
 
-pl.figure(2, figsize=(12, 3.5))
+pl.figure(2, figsize=(15, 3.5))
 pl.clf()
-pl.subplot(131)
+pl.subplot(141)
 pl.axis('off')
-pl.axis
-pl.title('(CG algo) FGW=%s \n \n OT sparsity = %s \n feasibility error = %s' % (
-    np.round(log0['fgw_dist'], 3), str(np.round(fgw0_sparsity, 2)) + ' %',
-    np.round(err0, 4)), fontsize=fontsize)
 
-p0, q0 = fgw0.sum(1), fgw0.sum(0)  # check marginals
+pl.title('(CG) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % (
+    np.round(log_cg['fgw_dist'], 3), str(np.round(T_cg_sparsity, 2)) + ' %',
+    np.round(err_cg, 4), str(np.round(time_cg, 2)) + ' ms'), fontsize=fontsize)
 
 pos1, pos2 = draw_transp_colored_GW(
-    weightedG2, C2, weightedG3, C3, part_G2, p1=p0, p2=q0, T=fgw0,
-    shiftx=1.5, node_size=node_size, seed_G1=seed_G2, seed_G2=seed_G3)
+    weightedG2, C2, weightedG3, C3, part_G2, p1=T_cg.sum(1), p2=T_cg.sum(0),
+    T=T_cg, shiftx=1.5, node_size=node_size, seed_G1=seed_G2, seed_G2=seed_G3)
 
-pl.subplot(132)
+pl.subplot(142)
 pl.axis('off')
 
-p, q = fgw.sum(1), fgw.sum(0)  # check marginals
-
-pl.title('(PP algo) FGW=%s\n \n OT sparsity = %s \n feasibility error = %s' % (
-    np.round(log['fgw_dist'], 3), str(np.round(fgw_sparsity, 2)) + ' %',
-    np.round(err, 4)), fontsize=fontsize)
+pl.title('(PPA) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % (
+    np.round(log_ppa['fgw_dist'], 3), str(np.round(T_ppa_sparsity, 2)) + ' %',
+    np.round(err_ppa, 4), str(np.round(time_ppa, 2)) + ' ms'), fontsize=fontsize)
 
 pos1, pos2 = draw_transp_colored_GW(
-    weightedG2, C2, weightedG3, C3, part_G2, p1=p, p2=q, T=fgw,
-    pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0)
+    weightedG2, C2, weightedG3, C3, part_G2, p1=T_ppa.sum(1), p2=T_ppa.sum(0),
+    T=T_ppa, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0)
 
-pl.subplot(133)
+pl.subplot(143)
 pl.axis('off')
 
-pe, qe = fgwe.sum(1), fgwe.sum(0)  # check marginals
+pl.title('(PGD) Entropic FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % (
+    np.round(log_pgd['fgw_dist'], 3), str(np.round(T_pgd_sparsity, 2)) + ' %',
+    np.round(err_pgd, 4), str(np.round(time_pgd, 2)) + ' ms'), fontsize=fontsize)
+
+pos1, pos2 = draw_transp_colored_GW(
+    weightedG2, C2, weightedG3, C3, part_G2, p1=T_pgd.sum(1), p2=T_pgd.sum(0),
+    T=T_pgd, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0)
+
+
+pl.subplot(144)
+pl.axis('off')
 
-pl.title('Entropic FGW=%s\n \n OT sparsity = %s \n feasibility error = %s' % (
-    np.round(loge['fgw_dist'], 3), str(np.round(fgwe_sparsity, 2)) + ' %',
-    np.round(erre, 4)), fontsize=fontsize)
+pl.title('(BAPG) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % (
+    np.round(log_bapg['fgw_dist'], 3), str(np.round(T_bapg_sparsity, 2)) + ' %',
+    np.round(err_bapg, 4), str(np.round(time_bapg, 2)) + ' ms'), fontsize=fontsize)
 
 pos1, pos2 = draw_transp_colored_GW(
-    weightedG2, C2, weightedG3, C3, part_G2, p1=pe, p2=qe, T=fgwe,
-    pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0)
+    weightedG2, C2, weightedG3, C3, part_G2, p1=T_bapg.sum(1), p2=T_bapg.sum(0),
+    T=T_bapg, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0)
 
 pl.tight_layout()