PythonOT
diff --git a/‎RELEASES.md
Lines changed: 5 additions & 2 deletions b/‎RELEASES.md
Lines changed: 5 additions & 2 deletions
diff --git a/‎ot/backend.py
Lines changed: 5 additions & 1 deletion b/‎ot/backend.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎ot/da.py
Lines changed: 2 additions & 2 deletions b/‎ot/da.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎ot/datasets.py
Lines changed: 1 addition & 1 deletion b/‎ot/datasets.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ot/dr.py
Lines changed: 7 additions & 3 deletions b/‎ot/dr.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎ot/gromov/_bregman.py
Lines changed: 27 additions & 18 deletions b/‎ot/gromov/_bregman.py
Lines changed: 27 additions & 18 deletions
diff --git a/‎ot/gromov/_dictionary.py
Lines changed: 19 additions & 8 deletions b/‎ot/gromov/_dictionary.py
Lines changed: 19 additions & 8 deletions
@@ -1,9 +1,12 @@
 # Releases
 
-## 0.9.2
+## 0.9.2dev
 
 #### New features
-- Callbacks for generalized conditional gradient in `ot.da.sinkhorn_l1l2_gl` are now vectorized to improve performance (PR #507)
++ Tweaked `get_backend` to ignore `None` inputs (PR # 525)
++ Callbacks for generalized conditional gradient in `ot.da.sinkhorn_l1l2_gl` are now vectorized to improve performance (PR #507)
+
+#### Closed issues
 
 
 ## 0.9.1
 
@@ -157,11 +157,15 @@ def _check_args_backend(backend, args):
 def get_backend(*args):
     """Returns the proper backend for a list of input arrays
 
+        Accepts None entries in the arguments, and ignores them
+
         Also raises TypeError if all arrays are not from the same backend
     """
+    args = [arg for arg in args if arg is not None]  # exclude None entries
+
     # check that some arrays given
     if not len(args) > 0:
-        raise ValueError(" The function takes at least one parameter")
+        raise ValueError(" The function takes at least one (non-None) parameter")
 
     for backend in _BACKENDS:
         if _check_args_backend(backend, args):
 
@@ -1390,8 +1390,8 @@ class LinearGWTransport(LinearTransport):
     References
     ----------
     .. [57] Delon, J., Desolneux, A., & Salmona, A. (2022). Gromov–Wasserstein
-    distances between Gaussian distributions. Journal of Applied Probability,
-    59(4), 1178-1198.
+            distances between Gaussian distributions. Journal of Applied Probability,
+            59(4), 1178-1198.
 
     """
 
 
@@ -155,7 +155,7 @@ def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwa
     elif dataset.lower() == '2gauss_prop':
 
         y = np.concatenate((np.ones(int(p * n)), np.zeros(int((1 - p) * n))))
-        x = np.hstack((0 * y[:, None] - 0, 1 - 2 * y[:, None])) + nz * np.random.randn(len(y), 2)
+        x = np.hstack((0 * y[:, None] - 0, 1 - 2 * y[:, None])) + nz * generator.randn(len(y), 2)
 
         if ('bias' not in kwargs) and ('b' not in kwargs):
             kwargs['bias'] = np.array([0, 2])
 
@@ -25,7 +25,7 @@
 import pymanopt.optimizers
 
 from .bregman import sinkhorn as sinkhorn_bregman
-from .utils import dist as dist_utils
+from .utils import dist as dist_utils, check_random_state
 
 
 def dist(x1, x2):
@@ -267,7 +267,7 @@ def proj(X):
     return Popt.point, proj
 
 
-def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0):
+def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0, random_state=None):
     r"""
     Projection Robust Wasserstein Distance :ref:`[32] <references-projection-robust-wasserstein>`
 
@@ -303,6 +303,9 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
         Stop threshold on error (>0)
     verbose : int, optional
         Print information along iterations.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for initial value of projection
+        operator when U0 is not given.
 
     Returns
     -------
@@ -332,7 +335,8 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
     assert d > k
 
     if U0 is None:
-        U = np.random.randn(d, k)
+        rng = check_random_state(random_state)
+        U = rng.randn(d, k)
         U, _ = np.linalg.qr(U)
     else:
         U = U0
 
@@ -81,7 +81,7 @@ def entropic_gromov_wasserstein(
     q : array-like, shape (nt,), optional
         Distribution in the target space.
         If let to its default value None, uniform distribution is taken.
-    loss_fun :  string, optional
+    loss_fun : string, optional (default='square_loss')
         Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
@@ -92,8 +92,8 @@ def entropic_gromov_wasserstein(
     G0: array-like, shape (ns,nt), optional
         If None the initial transport plan of the solver is pq^T.
         Otherwise G0 will be used as initial transport of the solver. G0 is not
-        required to satisfy marginal constraints but we strongly recommand it
-        to correcly estimate the GW distance.
+        required to satisfy marginal constraints but we strongly recommend it
+        to correctly estimate the GW distance.
     max_iter : int, optional
         Max number of iterations
     tol : float, optional
@@ -135,6 +135,9 @@ def entropic_gromov_wasserstein(
     if solver not in ['PGD', 'PPA']:
         raise ValueError("Unknown solver '%s'. Pick one in ['PGD', 'PPA']." % solver)
 
+    if loss_fun not in ('square_loss', 'kl_loss'):
+        raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
+
     C1, C2 = list_to_array(C1, C2)
     arr = [C1, C2]
     if p is not None:
@@ -280,7 +283,7 @@ def entropic_gromov_wasserstein2(
     q : array-like, shape (nt,), optional
         Distribution in the target space.
         If let to its default value None, uniform distribution is taken.
-    loss_fun :  string, optional
+    loss_fun : string, optional (default='square_loss')
         Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
@@ -373,8 +376,8 @@ def entropic_gromov_barycenters(
     lambdas : list of float, optional
         List of the `S` spaces' weights.
         If let to its default value None, uniform weights are taken.
-    loss_fun : callable, optional
-        tensor-matrix multiplication function based on specific loss function
+    loss_fun : string, optional (default='square_loss')
+        Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
     symmetric : bool, optional.
@@ -411,6 +414,9 @@ def entropic_gromov_barycenters(
         "Gromov-Wasserstein averaging of kernel and distance matrices."
         International Conference on Machine Learning (ICML). 2016.
     """
+    if loss_fun not in ('square_loss', 'kl_loss'):
+        raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
+
     Cs = list_to_array(*Cs)
     arr = [*Cs]
     if ps is not None:
@@ -459,7 +465,6 @@ def entropic_gromov_barycenters(
 
         if loss_fun == 'square_loss':
             C = update_square_loss(p, lambdas, T, Cs)
-
         elif loss_fun == 'kl_loss':
             C = update_kl_loss(p, lambdas, T, Cs)
 
@@ -550,21 +555,21 @@ def entropic_fused_gromov_wasserstein(
     q : array-like, shape (nt,), optional
         Distribution in the target space.
         If let to its default value None, uniform distribution is taken.
-    loss_fun :  string, optional
+    loss_fun : string, optional (default='square_loss')
         Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     alpha : float, optional
         Trade-off parameter (0 < alpha < 1)
     G0: array-like, shape (ns,nt), optional
         If None the initial transport plan of the solver is pq^T.
         Otherwise G0 will be used as initial transport of the solver. G0 is not
-        required to satisfy marginal constraints but we strongly recommand it
-        to correcly estimate the GW distance.
+        required to satisfy marginal constraints but we strongly recommend it
+        to correctly estimate the GW distance.
     max_iter : int, optional
         Max number of iterations
     tol : float, optional
@@ -611,6 +616,9 @@ def entropic_fused_gromov_wasserstein(
     if solver not in ['PGD', 'PPA']:
         raise ValueError("Unknown solver '%s'. Pick one in ['PGD', 'PPA']." % solver)
 
+    if loss_fun not in ('square_loss', 'kl_loss'):
+        raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
+
     M, C1, C2 = list_to_array(M, C1, C2)
     arr = [M, C1, C2]
     if p is not None:
@@ -762,7 +770,7 @@ def entropic_fused_gromov_wasserstein2(
     q : array-like, shape (nt,), optional
         Distribution in the target space.
         If let to its default value None, uniform distribution is taken.
-    loss_fun :  string, optional
+    loss_fun : string, optional (default='square_loss')
         Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
@@ -775,8 +783,8 @@ def entropic_fused_gromov_wasserstein2(
     G0: array-like, shape (ns,nt), optional
         If None the initial transport plan of the solver is pq^T.
         Otherwise G0 will be used as initial transport of the solver. G0 is not
-        required to satisfy marginal constraints but we strongly recommand it
-        to correcly estimate the GW distance.
+        required to satisfy marginal constraints but we strongly recommend it
+        to correctly estimate the GW distance.
     max_iter : int, optional
         Max number of iterations
     tol : float, optional
@@ -857,8 +865,8 @@ def entropic_fused_gromov_barycenters(
     lambdas : list of float, optional
         List of the `S` spaces' weights.
         If let to its default value None, uniform weights are taken.
-    loss_fun : callable, optional
-        tensor-matrix multiplication function based on specific loss function
+    loss_fun : string, optional (default='square_loss')
+        Loss function used for the solver either 'square_loss' or 'kl_loss'
     epsilon : float, optional
         Regularization term >0
     symmetric : bool, optional.
@@ -907,6 +915,9 @@ def entropic_fused_gromov_barycenters(
         "Optimal Transport for structured data with application on graphs"
         International Conference on Machine Learning (ICML). 2019.
     """
+    if loss_fun not in ('square_loss', 'kl_loss'):
+        raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
+
     Cs = list_to_array(*Cs)
     Ys = list_to_array(*Ys)
     arr = [*Cs, *Ys]
@@ -977,7 +988,6 @@ def entropic_fused_gromov_barycenters(
 
         if loss_fun == 'square_loss':
             C = update_square_loss(p, lambdas, T, Cs)
-
         elif loss_fun == 'kl_loss':
             C = update_kl_loss(p, lambdas, T, Cs)
 
@@ -1004,7 +1014,6 @@ def entropic_fused_gromov_barycenters(
                 print('{:5d}|{:8e}|'.format(cpt, err_feature))
 
         cpt += 1
-        print('Y type:', type(Y))
     if log:
         log_['T'] = T  # from target to Ys
         log_['p'] = p
 
@@ -11,13 +11,13 @@
 import numpy as np
 
 
-from ..utils import unif
+from ..utils import unif, check_random_state
 from ..backend import get_backend
 from ._gw import gromov_wasserstein, fused_gromov_wasserstein
 
 
 def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, epochs=20, batch_size=32, learning_rate=1., Cdict_init=None, projection='nonnegative_symmetric', use_log=True,
-                                           tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, **kwargs):
+                                           tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, random_state=None, **kwargs):
     r"""
     Infer Gromov-Wasserstein linear dictionary :math:`\{ (\mathbf{C_{dict}[d]}, q) \}_{d \in [D]}`  from the list of structures :math:`\{ (\mathbf{C_s},\mathbf{p_s}) \}_s`
 
@@ -81,6 +81,9 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         Maximum number of iterations for the Conjugate Gradient. Default is 200.
     verbose : bool, optional
         Print the reconstruction loss every epoch. Default is False.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -90,6 +93,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         The dictionary leading to the best loss over an epoch is saved and returned.
     log: dict
         If use_log is True, contains loss evolutions by batches and epochs.
+
     References
     -------
     .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online
@@ -110,10 +114,11 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         q = unif(nt)
     else:
         q = nx.to_numpy(q)
+    rng = check_random_state(random_state)
     if Cdict_init is None:
         # Initialize randomly structures of dictionary atoms based on samples
         dataset_means = [C.mean() for C in Cs]
-        Cdict = np.random.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
+        Cdict = rng.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
     else:
         Cdict = nx.to_numpy(Cdict_init).copy()
         assert Cdict.shape == (D, nt, nt)
@@ -141,7 +146,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
 
         for _ in range(iter_by_epoch):
             # batch sampling
-            batch = np.random.choice(range(dataset_size), size=batch_size, replace=False)
+            batch = rng.choice(range(dataset_size), size=batch_size, replace=False)
             cumulated_loss_over_batch = 0.
             unmixings = np.zeros((batch_size, D))
             Cs_embedded = np.zeros((batch_size, nt, nt))
@@ -469,7 +474,8 @@ def _linesearch_gromov_wasserstein_unmixing(w, grad_w, x, Cdict, Cembedded, cons
 
 def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., ps=None, q=None, epochs=20, batch_size=32, learning_rate_C=1., learning_rate_Y=1.,
                                                  Cdict_init=None, Ydict_init=None, projection='nonnegative_symmetric', use_log=False,
-                                                 tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, **kwargs):
+                                                 tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False,
+                                                 random_state=None, **kwargs):
     r"""
     Infer Fused Gromov-Wasserstein linear dictionary :math:`\{ (\mathbf{C_{dict}[d]}, \mathbf{Y_{dict}[d]}, \mathbf{q}) \}_{d \in [D]}`  from the list of S attributed structures :math:`\{ (\mathbf{C_s}, \mathbf{Y_s},\mathbf{p_s}) \}_s`
 
@@ -548,6 +554,9 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         Maximum number of iterations for the Conjugate Gradient. Default is 200.
     verbose : bool, optional
         Print the reconstruction loss every epoch. Default is False.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -560,6 +569,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         The dictionary leading to the best loss over an epoch is saved and returned.
     log: dict
         If use_log is True, contains loss evolutions by batches and epochs.
+
     References
     -------
     .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online
@@ -583,17 +593,18 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
     else:
         q = nx.to_numpy(q)
 
+    rng = check_random_state(random_state)
     if Cdict_init is None:
         # Initialize randomly structures of dictionary atoms based on samples
         dataset_means = [C.mean() for C in Cs]
-        Cdict = np.random.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
+        Cdict = rng.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
     else:
         Cdict = nx.to_numpy(Cdict_init).copy()
         assert Cdict.shape == (D, nt, nt)
     if Ydict_init is None:
         # Initialize randomly features of dictionary atoms based on samples distribution by feature component
         dataset_feature_means = np.stack([F.mean(axis=0) for F in Ys])
-        Ydict = np.random.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(D, nt, d))
+        Ydict = rng.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(D, nt, d))
     else:
         Ydict = nx.to_numpy(Ydict_init).copy()
         assert Ydict.shape == (D, nt, d)
@@ -626,7 +637,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         for _ in range(iter_by_epoch):
 
             # Batch iterations
-            batch = np.random.choice(range(dataset_size), size=batch_size, replace=False)
+            batch = rng.choice(range(dataset_size), size=batch_size, replace=False)
             cumulated_loss_over_batch = 0.
             unmixings = np.zeros((batch_size, D))
             Cs_embedded = np.zeros((batch_size, nt, nt))