From 62bc7e874b68e5e77c8e25bcccb45fb55c3f4e58 Mon Sep 17 00:00:00 2001
From: Oleksii Kachaiev <kachayev@gmail.com>
Date: Thu, 24 Aug 2023 16:42:33 +0200
Subject: [PATCH] Move from random seeding to local generators

---
 ot/dr.py                 |  10 ++-
 ot/gromov/_dictionary.py |  27 ++++---
 ot/stochastic.py         |  28 +++++---
 test/test_1d_solver.py   |   4 +-
 test/test_bregman.py     |  19 +++--
 test/test_coot.py        |  13 ++--
 test/test_da.py          |   8 +--
 test/test_dmmot.py       |   1 -
 test/test_dr.py          |  45 ++++++------
 test/test_gromov.py      | 147 ++++++++++++++++++++-------------------
 test/test_optim.py       |   2 -
 test/test_ot.py          |   2 -
 test/test_partial.py     |  31 +++++----
 test/test_plot.py        |   7 +-
 test/test_regpath.py     |  12 ++--
 test/test_sliced.py      |   2 +-
 test/test_unbalanced.py  |   8 +--
 test/test_utils.py       |   9 +--
 18 files changed, 206 insertions(+), 169 deletions(-)

diff --git a/ot/dr.py b/ot/dr.py
index cb5768fec..c56170209 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -25,7 +25,7 @@
 import pymanopt.optimizers
 
 from .bregman import sinkhorn as sinkhorn_bregman
-from .utils import dist as dist_utils
+from .utils import dist as dist_utils, check_random_state
 
 
 def dist(x1, x2):
@@ -267,7 +267,7 @@ def proj(X):
     return Popt.point, proj
 
 
-def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0):
+def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0, random_state=None):
     r"""
     Projection Robust Wasserstein Distance :ref:`[32] <references-projection-robust-wasserstein>`
 
@@ -303,6 +303,9 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
         Stop threshold on error (>0)
     verbose : int, optional
         Print information along iterations.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for initial value of projection
+        operator when U0 is not given.
 
     Returns
     -------
@@ -332,7 +335,8 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
     assert d > k
 
     if U0 is None:
-        U = np.random.randn(d, k)
+        rng = check_random_state(random_state)
+        U = rng.randn(d, k)
         U, _ = np.linalg.qr(U)
     else:
         U = U0
diff --git a/ot/gromov/_dictionary.py b/ot/gromov/_dictionary.py
index 0d618d19e..fbecb706a 100644
--- a/ot/gromov/_dictionary.py
+++ b/ot/gromov/_dictionary.py
@@ -11,13 +11,13 @@
 import numpy as np
 
 
-from ..utils import unif
+from ..utils import unif, check_random_state
 from ..backend import get_backend
 from ._gw import gromov_wasserstein, fused_gromov_wasserstein
 
 
 def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, epochs=20, batch_size=32, learning_rate=1., Cdict_init=None, projection='nonnegative_symmetric', use_log=True,
-                                           tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, **kwargs):
+                                           tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, random_state=None, **kwargs):
     r"""
     Infer Gromov-Wasserstein linear dictionary :math:`\{ (\mathbf{C_{dict}[d]}, q) \}_{d \in [D]}`  from the list of structures :math:`\{ (\mathbf{C_s},\mathbf{p_s}) \}_s`
 
@@ -81,6 +81,9 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         Maximum number of iterations for the Conjugate Gradient. Default is 200.
     verbose : bool, optional
         Print the reconstruction loss every epoch. Default is False.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -90,6 +93,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         The dictionary leading to the best loss over an epoch is saved and returned.
     log: dict
         If use_log is True, contains loss evolutions by batches and epochs.
+
     References
     -------
     .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online
@@ -110,10 +114,11 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
         q = unif(nt)
     else:
         q = nx.to_numpy(q)
+    rng = check_random_state(random_state)
     if Cdict_init is None:
         # Initialize randomly structures of dictionary atoms based on samples
         dataset_means = [C.mean() for C in Cs]
-        Cdict = np.random.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
+        Cdict = rng.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
     else:
         Cdict = nx.to_numpy(Cdict_init).copy()
         assert Cdict.shape == (D, nt, nt)
@@ -141,7 +146,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
 
         for _ in range(iter_by_epoch):
             # batch sampling
-            batch = np.random.choice(range(dataset_size), size=batch_size, replace=False)
+            batch = rng.choice(range(dataset_size), size=batch_size, replace=False)
             cumulated_loss_over_batch = 0.
             unmixings = np.zeros((batch_size, D))
             Cs_embedded = np.zeros((batch_size, nt, nt))
@@ -469,7 +474,8 @@ def _linesearch_gromov_wasserstein_unmixing(w, grad_w, x, Cdict, Cembedded, cons
 
 def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., ps=None, q=None, epochs=20, batch_size=32, learning_rate_C=1., learning_rate_Y=1.,
                                                  Cdict_init=None, Ydict_init=None, projection='nonnegative_symmetric', use_log=False,
-                                                 tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False, **kwargs):
+                                                 tol_outer=10**(-5), tol_inner=10**(-5), max_iter_outer=20, max_iter_inner=200, use_adam_optimizer=True, verbose=False,
+                                                 random_state=None, **kwargs):
     r"""
     Infer Fused Gromov-Wasserstein linear dictionary :math:`\{ (\mathbf{C_{dict}[d]}, \mathbf{Y_{dict}[d]}, \mathbf{q}) \}_{d \in [D]}`  from the list of S attributed structures :math:`\{ (\mathbf{C_s}, \mathbf{Y_s},\mathbf{p_s}) \}_s`
 
@@ -548,6 +554,9 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         Maximum number of iterations for the Conjugate Gradient. Default is 200.
     verbose : bool, optional
         Print the reconstruction loss every epoch. Default is False.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -560,6 +569,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         The dictionary leading to the best loss over an epoch is saved and returned.
     log: dict
         If use_log is True, contains loss evolutions by batches and epochs.
+
     References
     -------
     .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online
@@ -583,17 +593,18 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
     else:
         q = nx.to_numpy(q)
 
+    rng = check_random_state(random_state)
     if Cdict_init is None:
         # Initialize randomly structures of dictionary atoms based on samples
         dataset_means = [C.mean() for C in Cs]
-        Cdict = np.random.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
+        Cdict = rng.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(D, nt, nt))
     else:
         Cdict = nx.to_numpy(Cdict_init).copy()
         assert Cdict.shape == (D, nt, nt)
     if Ydict_init is None:
         # Initialize randomly features of dictionary atoms based on samples distribution by feature component
         dataset_feature_means = np.stack([F.mean(axis=0) for F in Ys])
-        Ydict = np.random.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(D, nt, d))
+        Ydict = rng.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(D, nt, d))
     else:
         Ydict = nx.to_numpy(Ydict_init).copy()
         assert Ydict.shape == (D, nt, d)
@@ -626,7 +637,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         for _ in range(iter_by_epoch):
 
             # Batch iterations
-            batch = np.random.choice(range(dataset_size), size=batch_size, replace=False)
+            batch = rng.choice(range(dataset_size), size=batch_size, replace=False)
             cumulated_loss_over_batch = 0.
             unmixings = np.zeros((batch_size, D))
             Cs_embedded = np.zeros((batch_size, nt, nt))
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 79d971bfd..fec512ccc 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -10,7 +10,7 @@
 # License: MIT License
 
 import numpy as np
-from .utils import dist
+from .utils import dist, check_random_state
 from .backend import get_backend
 
 ##############################################################################
@@ -69,7 +69,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
     return b - khi
 
 
-def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
+def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None, random_state=None):
     r"""
     Compute the SAG algorithm to solve the regularized discrete measures optimal transport max problem
 
@@ -110,6 +110,9 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
         Number of iteration.
     lr : float
         Learning rate.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -129,8 +132,9 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
     cur_beta = np.zeros(n_target)
     stored_gradient = np.zeros((n_source, n_target))
     sum_stored_gradient = np.zeros(n_target)
+    rng = check_random_state(random_state)
     for _ in range(numItermax):
-        i = np.random.randint(n_source)
+        i = rng.randint(n_source)
         cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg,
                                                           cur_beta, i)
         sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
@@ -139,7 +143,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
     return cur_beta
 
 
-def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
+def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None, random_state=None):
     r'''
     Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem
 
@@ -177,6 +181,9 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
         Number of iteration.
     lr : float
         Learning rate.
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -195,9 +202,10 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
     n_target = np.shape(M)[1]
     cur_beta = np.zeros(n_target)
     ave_beta = np.zeros(n_target)
+    rng = check_random_state(random_state)
     for cur_iter in range(numItermax):
         k = cur_iter + 1
-        i = np.random.randint(n_source)
+        i = rng.randint(n_source)
         cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i)
         cur_beta += (lr / np.sqrt(k)) * cur_coord_grad
         ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta
@@ -422,7 +430,7 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
     return grad_alpha, grad_beta
 
 
-def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
+def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr, random_state=None):
     r'''
     Compute the sgd algorithm to solve the regularized discrete measures optimal transport dual problem
 
@@ -460,6 +468,9 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
         number of iteration
     lr : float
         learning rate
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation. Pass an int for reproducible
+        output across multiple function calls.
 
     Returns
     -------
@@ -477,10 +488,11 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
     n_target = np.shape(M)[1]
     cur_alpha = np.zeros(n_source)
     cur_beta = np.zeros(n_target)
+    rng = check_random_state(random_state)
     for cur_iter in range(numItermax):
         k = np.sqrt(cur_iter + 1)
-        batch_alpha = np.random.choice(n_source, batch_size, replace=False)
-        batch_beta = np.random.choice(n_target, batch_size, replace=False)
+        batch_alpha = rng.choice(n_source, batch_size, replace=False)
+        batch_beta = rng.choice(n_target, batch_size, replace=False)
         update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha,
                                                     cur_beta, batch_size,
                                                     batch_alpha, batch_beta)
diff --git a/test/test_1d_solver.py b/test/test_1d_solver.py
index 075a4156d..265fab5b1 100644
--- a/test/test_1d_solver.py
+++ b/test/test_1d_solver.py
@@ -163,8 +163,8 @@ def test_emd_1d_emd2_1d():
     np.testing.assert_allclose(G, G_1d, atol=1e-15)
 
     # check AssertionError is raised if called on non 1d arrays
-    u = np.random.randn(n, 2)
-    v = np.random.randn(m, 2)
+    u = rng.randn(n, 2)
+    v = rng.randn(m, 2)
     with pytest.raises(AssertionError):
         ot.emd_1d(u, v, [], [])
 
diff --git a/test/test_bregman.py b/test/test_bregman.py
index f01bb144f..8355cda95 100644
--- a/test/test_bregman.py
+++ b/test/test_bregman.py
@@ -298,7 +298,8 @@ def test_sinkhorn_variants(nx):
 def test_sinkhorn_variants_dtype_device(nx, method):
     n = 100
 
-    x = np.random.randn(n, 2)
+    rng = np.random.RandomState(42)
+    x = rng.randn(n, 2)
     u = ot.utils.unif(n)
 
     M = ot.dist(x, x)
@@ -317,7 +318,8 @@ def test_sinkhorn_variants_dtype_device(nx, method):
 def test_sinkhorn2_variants_dtype_device(nx, method):
     n = 100
 
-    x = np.random.randn(n, 2)
+    rng = np.random.RandomState(42)
+    x = rng.randn(n, 2)
     u = ot.utils.unif(n)
 
     M = ot.dist(x, x)
@@ -337,7 +339,8 @@ def test_sinkhorn2_variants_dtype_device(nx, method):
 def test_sinkhorn2_variants_device_tf(method):
     nx = ot.backend.TensorflowBackend()
     n = 100
-    x = np.random.randn(n, 2)
+    rng = np.random.RandomState(42)
+    x = rng.randn(n, 2)
     u = ot.utils.unif(n)
     M = ot.dist(x, x)
 
@@ -690,11 +693,12 @@ def test_barycenter_stabilization(nx):
 
 @pytest.mark.parametrize("method", ["sinkhorn", "sinkhorn_log"])
 def test_wasserstein_bary_2d(nx, method):
+    rng = np.random.RandomState(42)
     size = 20  # size of a square image
-    a1 = np.random.rand(size, size)
+    a1 = rng.rand(size, size)
     a1 += a1.min()
     a1 = a1 / np.sum(a1)
-    a2 = np.random.rand(size, size)
+    a2 = rng.rand(size, size)
     a2 += a2.min()
     a2 = a2 / np.sum(a2)
     # creating matrix A containing all distributions
@@ -724,11 +728,12 @@ def test_wasserstein_bary_2d(nx, method):
 
 @pytest.mark.parametrize("method", ["sinkhorn", "sinkhorn_log"])
 def test_wasserstein_bary_2d_debiased(nx, method):
+    rng = np.random.RandomState(42)
     size = 20  # size of a square image
-    a1 = np.random.rand(size, size)
+    a1 = rng.rand(size, size)
     a1 += a1.min()
     a1 = a1 / np.sum(a1)
-    a2 = np.random.rand(size, size)
+    a2 = rng.rand(size, size)
     a2 += a2.min()
     a2 = a2 / np.sum(a2)
     # creating matrix A containing all distributions
diff --git a/test/test_coot.py b/test/test_coot.py
index ef68a9bcd..853ca793c 100644
--- a/test/test_coot.py
+++ b/test/test_coot.py
@@ -223,21 +223,22 @@ def test_coot_warmstart(nx):
     xt_nx = nx.from_numpy(xt)
 
     # initialize warmstart
-    init_pi_sample = np.random.rand(n_samples, n_samples)
+    rng = np.random.RandomState(42)
+    init_pi_sample = rng.rand(n_samples, n_samples)
     init_pi_sample = init_pi_sample / np.sum(init_pi_sample)
     init_pi_sample_nx = nx.from_numpy(init_pi_sample)
 
-    init_pi_feature = np.random.rand(2, 2)
+    init_pi_feature = rng.rand(2, 2)
     init_pi_feature /= init_pi_feature / np.sum(init_pi_feature)
     init_pi_feature_nx = nx.from_numpy(init_pi_feature)
 
-    init_duals_sample = (np.random.random(n_samples) * 2 - 1,
-                         np.random.random(n_samples) * 2 - 1)
+    init_duals_sample = (rng.random(n_samples) * 2 - 1,
+                         rng.random(n_samples) * 2 - 1)
     init_duals_sample_nx = (nx.from_numpy(init_duals_sample[0]),
                             nx.from_numpy(init_duals_sample[1]))
 
-    init_duals_feature = (np.random.random(2) * 2 - 1,
-                          np.random.random(2) * 2 - 1)
+    init_duals_feature = (rng.random(2) * 2 - 1,
+                          rng.random(2) * 2 - 1)
     init_duals_feature_nx = (nx.from_numpy(init_duals_feature[0]),
                              nx.from_numpy(init_duals_feature[1]))
 
diff --git a/test/test_da.py b/test/test_da.py
index c95d48850..dd7d1e0c8 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -567,12 +567,11 @@ def test_mapping_transport_class_specific_seed(nx):
     # check that it does not crash when derphi is very close to 0
     ns = 20
     nt = 30
-    np.random.seed(39)
-    Xs, ys = make_data_classif('3gauss', ns)
-    Xt, yt = make_data_classif('3gauss2', nt)
+    rng = np.random.RandomState(39)
+    Xs, ys = make_data_classif('3gauss', ns, random_state=rng)
+    Xt, yt = make_data_classif('3gauss2', nt, random_state=rng)
     otda = ot.da.MappingTransport(kernel="gaussian", bias=False)
     otda.fit(Xs=nx.from_numpy(Xs), Xt=nx.from_numpy(Xt))
-    np.random.seed(None)
 
 
 @pytest.skip_backend("jax")
@@ -712,7 +711,6 @@ def test_jcpot_barycenter(nx):
     nt = 50
 
     sigma = 0.1
-    np.random.seed(1985)
 
     ps1 = .2
     ps2 = .9
diff --git a/test/test_dmmot.py b/test/test_dmmot.py
index fa8dc6b89..dcc313755 100644
--- a/test/test_dmmot.py
+++ b/test/test_dmmot.py
@@ -10,7 +10,6 @@
 
 
 def create_test_data(nx):
-    np.random.seed(1234)
     n = 4
     a1 = ot.datasets.make_1D_gauss(n, m=20, s=5)
     a2 = ot.datasets.make_1D_gauss(n, m=60, s=8)
diff --git a/test/test_dr.py b/test/test_dr.py
index 4f0d937ef..3680547db 100644
--- a/test/test_dr.py
+++ b/test/test_dr.py
@@ -21,14 +21,14 @@
 def test_fda():
 
     n_samples = 90  # nb samples in source and target datasets
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
 
     # generate gaussian dataset
-    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples)
+    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples, random_state=rng)
 
     n_features_noise = 8
 
-    xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise)))
+    xs = np.hstack((xs, rng.randn(n_samples, n_features_noise)))
 
     p = 1
 
@@ -43,14 +43,14 @@ def test_fda():
 def test_wda():
 
     n_samples = 100  # nb samples in source and target datasets
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
 
     # generate gaussian dataset
-    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples)
+    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples, random_state=rng)
 
     n_features_noise = 8
 
-    xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise)))
+    xs = np.hstack((xs, rng.randn(n_samples, n_features_noise)))
 
     p = 2
 
@@ -65,14 +65,14 @@ def test_wda():
 def test_wda_low_reg():
 
     n_samples = 100  # nb samples in source and target datasets
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
 
     # generate gaussian dataset
-    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples)
+    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples, random_state=rng)
 
     n_features_noise = 8
 
-    xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise)))
+    xs = np.hstack((xs, rng.randn(n_samples, n_features_noise)))
 
     p = 2
 
@@ -87,18 +87,18 @@ def test_wda_low_reg():
 def test_wda_normalized():
 
     n_samples = 100  # nb samples in source and target datasets
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
 
     # generate gaussian dataset
-    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples)
+    xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples, random_state=rng)
 
     n_features_noise = 8
 
-    xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise)))
+    xs = np.hstack((xs, rng.randn(n_samples, n_features_noise)))
 
     p = 2
 
-    P0 = np.random.randn(10, p)
+    P0 = rng.randn(10, p)
     P0 /= P0.sum(0, keepdims=True)
 
     Pwda, projwda = ot.dr.wda(xs, ys, p, maxiter=10, P0=P0, normalize=True)
@@ -115,7 +115,7 @@ def test_prw():
     k = 3  # Subspace dimension
     dim = 3
 
-    def fragmented_hypercube(n, d, dim):
+    def fragmented_hypercube(n, d, dim, rng):
         assert dim <= d
         assert dim >= 1
         assert dim == int(dim)
@@ -124,21 +124,22 @@ def fragmented_hypercube(n, d, dim):
         b = (1. / n) * np.ones(n)
 
         # First measure : uniform on the hypercube
-        X = np.random.uniform(-1, 1, size=(n, d))
+        X = rng.uniform(-1, 1, size=(n, d))
 
         # Second measure : fragmentation
-        tmp_y = np.random.uniform(-1, 1, size=(n, d))
+        tmp_y = rng.uniform(-1, 1, size=(n, d))
         Y = tmp_y + 2 * np.sign(tmp_y) * np.array(dim * [1] + (d - dim) * [0])
         return a, b, X, Y
 
-    a, b, X, Y = fragmented_hypercube(n, d, dim)
+    rng = np.random.RandomState(42)
+    a, b, X, Y = fragmented_hypercube(n, d, dim, rng)
 
     tau = 0.002
     reg = 0.2
 
     pi, U = ot.dr.projection_robust_wasserstein(X, Y, a, b, tau, reg=reg, k=k, maxiter=1000, verbose=1)
 
-    U0 = np.random.randn(d, k)
+    U0 = rng.randn(d, k)
     U0, _ = np.linalg.qr(U0)
 
     pi, U = ot.dr.projection_robust_wasserstein(X, Y, a, b, tau, U0=U0, reg=reg, k=k, maxiter=1000, verbose=1)
@@ -150,15 +151,15 @@ def test_ewca():
     d = 5
     n_samples = 50
     k = 3
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
 
     # generate gaussian dataset
-    A = np.random.normal(size=(d, d))
+    A = rng.normal(size=(d, d))
     Q, _ = np.linalg.qr(A)
-    D = np.random.normal(size=d)
+    D = rng.normal(size=d)
     D = (D / np.linalg.norm(D)) ** 4
     cov = Q @ np.diag(D) @ Q.T
-    X = np.random.multivariate_normal(np.zeros(d), cov, size=n_samples)
+    X = rng.multivariate_normal(np.zeros(d), cov, size=n_samples)
     X = X - X.mean(0, keepdims=True)
     assert X.shape == (n_samples, d)
 
diff --git a/test/test_gromov.py b/test/test_gromov.py
index 13ff3fe99..be4f659fe 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -79,10 +79,10 @@ def test_gromov(nx):
 
 def test_asymmetric_gromov(nx):
     n_samples = 30  # nb samples
-    np.random.seed(0)
-    C1 = np.random.uniform(low=0., high=10, size=(n_samples, n_samples))
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
     idx = np.arange(n_samples)
-    np.random.shuffle(idx)
+    rng.shuffle(idx)
     C2 = C1[idx, :][:, idx]
 
     p = ot.unif(n_samples)
@@ -412,10 +412,10 @@ def test_entropic_proximal_gromov(nx):
 @pytest.skip_backend("tf", reason="test very slow with tf backend")
 def test_asymmetric_entropic_gromov(nx):
     n_samples = 10  # nb samples
-    np.random.seed(0)
-    C1 = np.random.uniform(low=0., high=10, size=(n_samples, n_samples))
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
     idx = np.arange(n_samples)
-    np.random.shuffle(idx)
+    rng.shuffle(idx)
     C2 = C1[idx, :][:, idx]
 
     p = ot.unif(n_samples)
@@ -501,7 +501,8 @@ def test_entropic_fgw(nx):
 
     xt = xs[::-1].copy()
 
-    ys = np.random.randn(xs.shape[0], 2)
+    rng = np.random.RandomState(42)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
@@ -565,7 +566,8 @@ def test_entropic_proximal_fgw(nx):
 
     xt = xs[::-1].copy()
 
-    ys = np.random.randn(xs.shape[0], 2)
+    rng = np.random.RandomState(42)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
@@ -621,13 +623,13 @@ def test_entropic_proximal_fgw(nx):
 
 def test_asymmetric_entropic_fgw(nx):
     n_samples = 10  # nb samples
-    np.random.seed(0)
-    C1 = np.random.uniform(low=0., high=10, size=(n_samples, n_samples))
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
     idx = np.arange(n_samples)
-    np.random.shuffle(idx)
+    rng.shuffle(idx)
     C2 = C1[idx, :][:, idx]
 
-    ys = np.random.randn(n_samples, 2)
+    ys = rng.randn(n_samples, 2)
     yt = ys[idx, :]
     M = ot.dist(ys, yt)
 
@@ -671,11 +673,12 @@ def test_entropic_fgw_dtype_device(nx):
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42)
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=rng)
 
     xt = xs[::-1].copy()
 
-    ys = np.random.randn(xs.shape[0], 2)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
@@ -711,11 +714,12 @@ def test_entropic_fgw_barycenter(nx):
     ns = 5
     nt = 10
 
+    rng = np.random.RandomState(42)
     Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
     Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)
 
-    ys = np.random.randn(Xs.shape[0], 2)
-    yt = np.random.randn(Xt.shape[0], 2)
+    ys = rng.randn(Xs.shape[0], 2)
+    yt = rng.randn(Xt.shape[0], 2)
 
     C1 = ot.dist(Xs)
     C2 = ot.dist(Xt)
@@ -1031,7 +1035,8 @@ def test_fgw(nx):
 
     xt = xs[::-1].copy()
 
-    ys = np.random.randn(xs.shape[0], 2)
+    rng = np.random.RandomState(42)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
@@ -1085,14 +1090,14 @@ def test_fgw(nx):
 
 def test_asymmetric_fgw(nx):
     n_samples = 50  # nb samples
-    np.random.seed(0)
-    C1 = np.random.uniform(low=0., high=10, size=(n_samples, n_samples))
+    rng = np.random.RandomState(0)
+    C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples))
     idx = np.arange(n_samples)
-    np.random.shuffle(idx)
+    rng.shuffle(idx)
     C2 = C1[idx, :][:, idx]
 
     # add features
-    F1 = np.random.uniform(low=0., high=10, size=(n_samples, 1))
+    F1 = rng.uniform(low=0., high=10, size=(n_samples, 1))
     F2 = F1[idx, :]
     p = ot.unif(n_samples)
     q = ot.unif(n_samples)
@@ -1228,10 +1233,11 @@ def test_fgw_helper_backend(nx):
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 1]])
 
+    rng = np.random.RandomState(42)
     xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=0)
-    ys = np.random.randn(xs.shape[0], 2)
+    ys = rng.randn(xs.shape[0], 2)
     xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=1)
-    yt = np.random.randn(xt.shape[0], 2)
+    yt = rng.randn(xt.shape[0], 2)
 
     p = ot.unif(n_samples)
     q = ot.unif(n_samples)
@@ -1274,16 +1280,15 @@ def line_search(cost, G, deltaG, Mi, cost_G):
 
 
 def test_fgw_barycenter(nx):
-    np.random.seed(42)
-
     ns = 10
     nt = 20
 
     Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
     Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)
 
-    ys = np.random.randn(Xs.shape[0], 2)
-    yt = np.random.randn(Xt.shape[0], 2)
+    rng = np.random.RandomState(42)
+    ys = rng.randn(Xs.shape[0], 2)
+    yt = rng.randn(Xt.shape[0], 2)
 
     C1 = ot.dist(Xs)
     C2 = ot.dist(Xt)
@@ -1301,7 +1306,7 @@ def test_fgw_barycenter(nx):
         fixed_features=False, p=pb, loss_fun='square_loss', max_iter=100, tol=1e-3, random_state=12345
     )
 
-    xalea = np.random.randn(n_samples, 2)
+    xalea = rng.randn(n_samples, 2)
     init_C = ot.dist(xalea, xalea)
     init_C /= init_C.max()
     init_Cb = nx.from_numpy(init_C)
@@ -1315,7 +1320,7 @@ def test_fgw_barycenter(nx):
     np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
     np.testing.assert_allclose(Xb.shape, (n_samples, ys.shape[1]))
 
-    init_X = np.random.randn(n_samples, ys.shape[1])
+    init_X = rng.randn(n_samples, ys.shape[1])
     init_Xb = nx.from_numpy(init_X)
 
     Xb, Cb, logb = ot.gromov.fgw_barycenters(
@@ -1438,8 +1443,8 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     # Provide initialization for the graph dictionary of shape (n_atoms, shape, shape)
     # following the same procedure than implemented in gromov_wasserstein_dictionary_learning.
     dataset_means = [C.mean() for C in Cs]
-    np.random.seed(0)
-    Cdict_init = np.random.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(n_atoms, shape, shape))
+    rng = np.random.RandomState(0)
+    Cdict_init = rng.normal(loc=np.mean(dataset_means), scale=np.std(dataset_means), size=(n_atoms, shape, shape))
 
     if projection == 'nonnegative_symmetric':
         Cdict_init = 0.5 * (Cdict_init + Cdict_init.transpose((0, 2, 1)))
@@ -1507,12 +1512,12 @@ def test_gromov_wasserstein_dictionary_learning(nx):
 
     # Test: Perform same comparison without providing the initial dictionary being an optional input
     #       knowing than the initialization scheme is the same than implemented to set the benchmarked initialization.
-    np.random.seed(0)
     Cdict_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Cs, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose,
+        random_state=0
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_bis = 0
@@ -1526,12 +1531,12 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_allclose(total_reconstruction_bis, total_reconstruction, atol=1e-05)
 
     # Test: Same after going through backend
-    np.random.seed(0)
     Cdictb_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer,
+        verbose=verbose, random_state=0
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_b_bis = 0
@@ -1553,12 +1558,12 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     verbose = True
     use_log = True
 
-    np.random.seed(0)
     Cdict_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Cs, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict,
         epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer,
+        verbose=verbose, random_state=0,
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_bis2 = 0
@@ -1572,12 +1577,12 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_array_less(total_reconstruction_bis2, total_reconstruction)
 
     # Test: Same after going through backend
-    np.random.seed(0)
     Cdictb_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=Cdictb,
         epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer,
+        verbose=verbose, random_state=0,
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_b_bis2 = 0
@@ -1701,13 +1706,13 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     # Provide initialization for the graph dictionary of shape (n_atoms, shape, shape)
     # following the same procedure than implemented in gromov_wasserstein_dictionary_learning.
     dataset_structure_means = [C.mean() for C in Cs]
-    np.random.seed(0)
-    Cdict_init = np.random.normal(loc=np.mean(dataset_structure_means), scale=np.std(dataset_structure_means), size=(n_atoms, shape, shape))
+    rng = np.random.RandomState(0)
+    Cdict_init = rng.normal(loc=np.mean(dataset_structure_means), scale=np.std(dataset_structure_means), size=(n_atoms, shape, shape))
     if projection == 'nonnegative_symmetric':
         Cdict_init = 0.5 * (Cdict_init + Cdict_init.transpose((0, 2, 1)))
         Cdict_init[Cdict_init < 0.] = 0.
     dataset_feature_means = np.stack([Y.mean(axis=0) for Y in Ys])
-    Ydict_init = np.random.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(n_atoms, shape, 2))
+    Ydict_init = rng.normal(loc=dataset_feature_means.mean(axis=0), scale=dataset_feature_means.std(axis=0), size=(n_atoms, shape, 2))
 
     Csb = nx.from_numpy(*Cs)
     Ysb = nx.from_numpy(*Ys)
@@ -1749,12 +1754,12 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_array_less(total_reconstruction, initial_total_reconstruction)
 
     # Test: Perform same experiments after going through backend
-
     Cdictb, Ydictb, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdict_initb, Ydict_init=Ydict_initb,
         epochs=epochs, batch_size=2 * n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose,
+        random_state=0
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_b = 0
@@ -1772,12 +1777,12 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_allclose(Ydict, nx.to_numpy(Ydictb), atol=1e-03)
 
     # Test: Perform similar experiment without providing the initial dictionary being an optional input
-    np.random.seed(0)
     Cdict_bis, Ydict_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Cs, Ys, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose,
+        random_state=0
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_bis = 0
@@ -1791,12 +1796,12 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_allclose(total_reconstruction_bis, total_reconstruction, atol=1e-05)
 
     # > Same after going through backend
-    np.random.seed(0)
     Cdictb_bis, Ydictb_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose,
+        random_state=0,
     )
 
     # > Compute reconstruction of samples on learned dictionary
@@ -1817,12 +1822,12 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     use_log = True
 
     # > Experiment providing previously estimated dictionary to speed up the test compared to providing initial random init.
-    np.random.seed(0)
     Cdict_bis2, Ydict_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Cs, Ys, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict, Ydict_init=Ydict,
         epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer,
+        verbose=verbose, random_state=0,
     )
     # > Compute reconstruction of samples on learned dictionary
     total_reconstruction_bis2 = 0
@@ -1836,12 +1841,12 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     np.testing.assert_array_less(total_reconstruction_bis2, total_reconstruction)
 
     # > Same after going through backend
-    np.random.seed(0)
     Cdictb_bis2, Ydictb_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdictb, Ydict_init=Ydictb,
         epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0.,
         tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
-        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
+        projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose,
+        random_state=0,
     )
 
     # > Compute reconstruction of samples on learned dictionary
@@ -1859,7 +1864,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
 
 
 def test_semirelaxed_gromov(nx):
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
     # unbalanced proportions
     list_n = [30, 15]
     nt = 2
@@ -1871,7 +1876,7 @@ def test_semirelaxed_gromov(nx):
     for i in range(nt):
         for j in range(nt):
             ni, nj = list_n[i], list_n[j]
-            xij = np.random.binomial(size=(ni, nj), n=1, p=C2[i, j])
+            xij = rng.binomial(size=(ni, nj), n=1, p=C2[i, j])
             C1[i * ni: (i + 1) * ni, j * nj: (j + 1) * nj] = xij
     p = ot.unif(ns, type_as=C1)
     q0 = ot.unif(C2.shape[0], type_as=C1)
@@ -2022,7 +2027,7 @@ def line_search(cost, G, deltaG, Mi, cost_G):
 
 
 def test_semirelaxed_fgw(nx):
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
     list_n = [16, 8]
     nt = 2
     ns = 24
@@ -2033,11 +2038,11 @@ def test_semirelaxed_fgw(nx):
     for i in range(nt):
         for j in range(nt):
             ni, nj = list_n[i], list_n[j]
-            xij = np.random.binomial(size=(ni, nj), n=1, p=C2[i, j])
+            xij = rng.binomial(size=(ni, nj), n=1, p=C2[i, j])
             C1[i * ni: (i + 1) * ni, j * nj: (j + 1) * nj] = xij
     F1 = np.zeros((ns, 1))
-    F1[:16] = np.random.normal(loc=0., scale=0.01, size=(16, 1))
-    F1[16:] = np.random.normal(loc=1., scale=0.01, size=(8, 1))
+    F1[:16] = rng.normal(loc=0., scale=0.01, size=(16, 1))
+    F1[16:] = rng.normal(loc=1., scale=0.01, size=(8, 1))
     F2 = np.zeros((2, 1))
     F2[1, :] = 1.
     M = (F1 ** 2).dot(np.ones((1, nt))) + np.ones((ns, 1)).dot((F2 ** 2).T) - 2 * F1.dot(F2.T)
@@ -2163,10 +2168,11 @@ def test_srfgw_helper_backend(nx):
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 1]])
 
+    rng = np.random.RandomState(42)
     xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=0)
-    ys = np.random.randn(xs.shape[0], 2)
+    ys = rng.randn(xs.shape[0], 2)
     xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=1)
-    yt = np.random.randn(xt.shape[0], 2)
+    yt = rng.randn(xt.shape[0], 2)
 
     p = ot.unif(n_samples)
     q = ot.unif(n_samples)
@@ -2209,7 +2215,6 @@ def line_search(cost, G, deltaG, Mi, cost_G):
 
 
 def test_entropic_semirelaxed_gromov(nx):
-    np.random.seed(0)
     # unbalanced proportions
     list_n = [30, 15]
     nt = 2
@@ -2218,10 +2223,11 @@ def test_entropic_semirelaxed_gromov(nx):
     C1 = np.zeros((ns, ns), dtype=np.float64)
     C2 = np.array([[0.8, 0.05],
                    [0.05, 1.]], dtype=np.float64)
+    rng = np.random.RandomState(0)
     for i in range(nt):
         for j in range(nt):
             ni, nj = list_n[i], list_n[j]
-            xij = np.random.binomial(size=(ni, nj), n=1, p=C2[i, j])
+            xij = rng.binomial(size=(ni, nj), n=1, p=C2[i, j])
             C1[i * ni: (i + 1) * ni, j * nj: (j + 1) * nj] = xij
     p = ot.unif(ns, type_as=C1)
     q0 = ot.unif(C2.shape[0], type_as=C1)
@@ -2318,7 +2324,7 @@ def test_entropic_semirelaxed_gromov_dtype_device(nx):
 
 
 def test_entropic_semirelaxed_fgw(nx):
-    np.random.seed(0)
+    rng = np.random.RandomState(0)
     list_n = [16, 8]
     nt = 2
     ns = 24
@@ -2329,11 +2335,11 @@ def test_entropic_semirelaxed_fgw(nx):
     for i in range(nt):
         for j in range(nt):
             ni, nj = list_n[i], list_n[j]
-            xij = np.random.binomial(size=(ni, nj), n=1, p=C2[i, j])
+            xij = rng.binomial(size=(ni, nj), n=1, p=C2[i, j])
             C1[i * ni: (i + 1) * ni, j * nj: (j + 1) * nj] = xij
     F1 = np.zeros((ns, 1))
-    F1[:16] = np.random.normal(loc=0., scale=0.01, size=(16, 1))
-    F1[16:] = np.random.normal(loc=1., scale=0.01, size=(8, 1))
+    F1[:16] = rng.normal(loc=0., scale=0.01, size=(16, 1))
+    F1[16:] = rng.normal(loc=1., scale=0.01, size=(8, 1))
     F2 = np.zeros((2, 1))
     F2[1, :] = 1.
     M = (F1 ** 2).dot(np.ones((1, nt))) + np.ones((ns, 1)).dot((F2 ** 2).T) - 2 * F1.dot(F2.T)
@@ -2407,7 +2413,8 @@ def test_entropic_semirelaxed_fgw_dtype_device(nx):
 
     xt = xs[::-1].copy()
 
-    ys = np.random.randn(xs.shape[0], 2)
+    rng = np.random.RandomState(42)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
@@ -2441,10 +2448,10 @@ def test_not_implemented_solver():
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42)
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=rng)
     xt = xs[::-1].copy()
-
-    ys = np.random.randn(xs.shape[0], 2)
+    ys = rng.randn(xs.shape[0], 2)
     yt = ys[::-1].copy()
 
     p = ot.unif(n_samples)
diff --git a/test/test_optim.py b/test/test_optim.py
index a43e704c3..cf90350d5 100644
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -11,7 +11,6 @@
 def test_conditional_gradient(nx):
 
     n_bins = 100  # nb bins
-    np.random.seed(0)
     # bin positions
     x = np.arange(n_bins, dtype=np.float64)
 
@@ -90,7 +89,6 @@ def fb(G):
 def test_generalized_conditional_gradient(nx):
 
     n_bins = 100  # nb bins
-    np.random.seed(0)
     # bin positions
     x = np.arange(n_bins, dtype=np.float64)
 
diff --git a/test/test_ot.py b/test/test_ot.py
index 068080baa..cbb63185a 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -340,7 +340,6 @@ def test_free_support_barycenter_backends(nx):
 
 
 def test_generalised_free_support_barycenter():
-    np.random.seed(42)  # random inits
     X = [np.array([-1., -1.]).reshape((1, 2)), np.array([1., 1.]).reshape((1, 2))]  # two 2D points bar is obviously 0
     a = [np.array([1.]), np.array([1.])]
 
@@ -361,7 +360,6 @@ def test_generalised_free_support_barycenter():
 
 
 def test_generalised_free_support_barycenter_backends(nx):
-    np.random.seed(42)
     X = [np.array([-1.]).reshape((1, 1)), np.array([1.]).reshape((1, 1))]
     a = [np.array([1.]), np.array([1.])]
     P = [np.array([1.]).reshape((1, 1)), np.array([1.]).reshape((1, 1))]
diff --git a/test/test_partial.py b/test/test_partial.py
index 86f9e6241..0b49b2892 100755
--- a/test/test_partial.py
+++ b/test/test_partial.py
@@ -20,10 +20,11 @@ def test_raise_errors():
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 2]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xs = np.append(xs, (np.random.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
-    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xt = np.append(xt, (np.random.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xs = np.append(xs, (rng.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
+    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xt = np.append(xt, (rng.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
 
     M = ot.dist(xs, xt)
 
@@ -68,10 +69,11 @@ def test_partial_wasserstein_lagrange():
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 2]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xs = np.append(xs, (np.random.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
-    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xt = np.append(xt, (np.random.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xs = np.append(xs, (rng.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
+    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xt = np.append(xt, (rng.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
 
     M = ot.dist(xs, xt)
 
@@ -91,10 +93,11 @@ def test_partial_wasserstein(nx):
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 2]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xs = np.append(xs, (np.random.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
-    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov)
-    xt = np.append(xt, (np.random.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xs = np.append(xs, (rng.rand(n_noise, 2) + 1) * 4).reshape((-1, 2))
+    xt = ot.datasets.make_2D_samples_gauss(n_samples, mu, cov, random_state=rng)
+    xt = np.append(xt, (rng.rand(n_noise, 2) + 1) * -3).reshape((-1, 2))
 
     M = ot.dist(xs, xt)
 
@@ -210,7 +213,7 @@ def test_entropic_partial_wasserstein_gradient():
 
 
 def test_partial_gromov_wasserstein():
-    rng = np.random.RandomState(seed=42)
+    rng = np.random.RandomState(42)
     n_samples = 20  # nb samples
     n_noise = 10  # nb of samples (noise)
 
@@ -223,7 +226,7 @@ def test_partial_gromov_wasserstein():
     mu_t = np.array([0, 0, 0])
     cov_t = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, rng)
+    xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=rng)
     xs = np.concatenate((xs, ((rng.rand(n_noise, 2) + 1) * 4)), axis=0)
     P = sp.linalg.sqrtm(cov_t)
     xt = rng.randn(n_samples, 3).dot(P) + mu_t
diff --git a/test/test_plot.py b/test/test_plot.py
index caf84deb1..e07977952 100644
--- a/test/test_plot.py
+++ b/test/test_plot.py
@@ -52,9 +52,10 @@ def test_plot2D_samples_mat():
     mu_t = np.array([4, 4])
     cov_t = np.array([[1, -.8], [-.8, 1]])
 
-    xs = ot.datasets.make_2D_samples_gauss(n_bins, mu_s, cov_s)
-    xt = ot.datasets.make_2D_samples_gauss(n_bins, mu_t, cov_t)
+    rng = np.random.RandomState(42)
+    xs = ot.datasets.make_2D_samples_gauss(n_bins, mu_s, cov_s, random_state=rng)
+    xt = ot.datasets.make_2D_samples_gauss(n_bins, mu_t, cov_t, random_state=rng)
 
-    G = 1.0 * (np.random.rand(n_bins, n_bins) < 0.01)
+    G = 1.0 * (rng.rand(n_bins, n_bins) < 0.01)
 
     ot.plot.plot2D_samples_mat(xs, xt, G, thr=1e-5)
diff --git a/test/test_regpath.py b/test/test_regpath.py
index 967c27b0f..76be39caf 100644
--- a/test/test_regpath.py
+++ b/test/test_regpath.py
@@ -16,9 +16,9 @@ def test_fully_relaxed_path():
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 2]])
 
-    np.random.seed(0)
-    xs = ot.datasets.make_2D_samples_gauss(n_source, mu, cov)
-    xt = ot.datasets.make_2D_samples_gauss(n_target, mu, cov)
+    rng = np.random.RandomState(0)
+    xs = ot.datasets.make_2D_samples_gauss(n_source, mu, cov, random_state=rng)
+    xt = ot.datasets.make_2D_samples_gauss(n_target, mu, cov, random_state=rng)
 
     # source and target distributions
     a = ot.utils.unif(n_source)
@@ -44,9 +44,9 @@ def test_semi_relaxed_path():
     mu = np.array([0, 0])
     cov = np.array([[1, 0], [0, 2]])
 
-    np.random.seed(0)
-    xs = ot.datasets.make_2D_samples_gauss(n_source, mu, cov)
-    xt = ot.datasets.make_2D_samples_gauss(n_target, mu, cov)
+    rng = np.random.RandomState(0)
+    xs = ot.datasets.make_2D_samples_gauss(n_source, mu, cov, random_state=rng)
+    xt = ot.datasets.make_2D_samples_gauss(n_target, mu, cov, random_state=rng)
 
     # source and target distributions
     a = ot.utils.unif(n_source)
diff --git a/test/test_sliced.py b/test/test_sliced.py
index 6d5a27b86..0062e12a0 100644
--- a/test/test_sliced.py
+++ b/test/test_sliced.py
@@ -272,7 +272,7 @@ def test_projections_stiefel():
     rng = np.random.RandomState(0)
 
     n_projs = 500
-    x = np.random.randn(100, 3)
+    x = rng.randn(100, 3)
     x = x / np.sqrt(np.sum(x**2, -1, keepdims=True))
 
     ssw, log = ot.sliced_wasserstein_sphere(x, x, n_projections=n_projs,
diff --git a/test/test_unbalanced.py b/test/test_unbalanced.py
index b76d738ab..24e5bc427 100644
--- a/test/test_unbalanced.py
+++ b/test/test_unbalanced.py
@@ -292,11 +292,9 @@ def test_implemented_methods(nx):
 
 @pytest.mark.parametrize("reg_div,regm_div", itertools.product(['kl', 'l2', 'entropy'], ['kl', 'l2']))
 def test_lbfgsb_unbalanced(nx, reg_div, regm_div):
-
-    np.random.seed(42)
-
-    xs = np.random.randn(5, 2)
-    xt = np.random.randn(6, 2)
+    rng = np.random.RandomState(42)
+    xs = rng.randn(5, 2)
+    xt = rng.randn(6, 2)
 
     M = ot.dist(xs, xt)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index a726a588c..787fbe68a 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -195,7 +195,7 @@ def test_dist():
 
     for metric in metrics_w:
         print(metric)
-        ot.dist(x, x, metric=metric, p=3, w=np.random.random((2, )))
+        ot.dist(x, x, metric=metric, p=3, w=rng.random((2, )))
         ot.dist(x, x, metric=metric, p=3, w=None)  # check that not having any weight does not cause issues
     for metric in metrics:
         print(metric)
@@ -271,8 +271,9 @@ def test_clean_zeros():
 
 
 def test_cost_normalization(nx):
+    rng = np.random.RandomState(0)
 
-    C = np.random.rand(10, 10)
+    C = rng.rand(10, 10)
     C1 = nx.from_numpy(C)
 
     # does nothing
@@ -392,8 +393,8 @@ def test_OTResult():
 
 
 def test_get_coordinate_circle():
-
-    u = np.random.rand(1, 100)
+    rng = np.random.RandomState(42)
+    u = rng.rand(1, 100)
     x1, y1 = np.cos(u * (2 * np.pi)), np.sin(u * (2 * np.pi))
     x = np.concatenate([x1, y1]).T
     x_p = ot.utils.get_coordinate_circle(x)