From a2ae9e10932e58448292e7e3412958649ec3c2d0 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 23 Apr 2019 09:40:36 +0200
Subject: [PATCH 01/52] initiate PR

---
 metric_learn/itml.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 9b6dccb2..082a2564 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -31,7 +31,7 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               A0=None, verbose=False, preprocessor=None):
+               A0='deprecated', verbose=False, preprocessor=None):
     """Initialize ITML.
 
     Parameters
@@ -43,8 +43,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
     convergence_threshold : float, optional
 
-    A0 : (d x d) matrix, optional
-        initial regularization matrix, defaults to identity
+    A0 : Not used
+      .. deprecated:: 0.5.0
+        `A0` was deprecated in version 0.5.0 and will
+        be removed in 0.6.0. Use parameter `init` instead.
 
     verbose : bool, optional
         if True, prints information while learning

From 5e626d59113b44eb8288ad83fafc1570fd671b39 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 24 Apr 2019 15:52:43 +0200
Subject: [PATCH 02/52] Revert "initiate PR"

This reverts commit a2ae9e10932e58448292e7e3412958649ec3c2d0.
---
 metric_learn/itml.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 082a2564..9b6dccb2 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -31,7 +31,7 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               A0='deprecated', verbose=False, preprocessor=None):
+               A0=None, verbose=False, preprocessor=None):
     """Initialize ITML.
 
     Parameters
@@ -43,10 +43,8 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
     convergence_threshold : float, optional
 
-    A0 : Not used
-      .. deprecated:: 0.5.0
-        `A0` was deprecated in version 0.5.0 and will
-        be removed in 0.6.0. Use parameter `init` instead.
+    A0 : (d x d) matrix, optional
+        initial regularization matrix, defaults to identity
 
     verbose : bool, optional
         if True, prints information while learning

From ffcfa2d029814a90bdb7e3aac466c3496fdb4ea7 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 24 Apr 2019 16:13:37 +0200
Subject: [PATCH 03/52] FEAT: uniformize init for NCA and RCA

---
 metric_learn/_util.py          | 130 +++++++++++++++++++++++++++-
 metric_learn/base_metric.py    |   2 +-
 metric_learn/covariance.py     |   2 +-
 metric_learn/itml.py           |   4 +-
 metric_learn/lmnn.py           |  68 +++++++++++++--
 metric_learn/lsml.py           |   4 +-
 metric_learn/mmc.py            |   4 +-
 metric_learn/nca.py            |  56 +++++++++++-
 metric_learn/sdml.py           |   4 +-
 test/metric_learn_test.py      |   8 +-
 test/test_base_metric.py       |  10 ++-
 test/test_mahalanobis_mixin.py | 154 +++++++++++++++++++++++++++++++--
 12 files changed, 409 insertions(+), 37 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 33311620..cfed1caa 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -1,10 +1,13 @@
-import warnings
 import numpy as np
 import six
 from numpy.linalg import LinAlgError
+from sklearn.decomposition import PCA
 from sklearn.utils import check_array
-from sklearn.utils.validation import check_X_y
+from sklearn.utils.validation import check_X_y, check_random_state
 from metric_learn.exceptions import PreprocessorError
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+import sys
+import time
 
 # hack around lack of axis kwarg in older numpy versions
 try:
@@ -405,3 +408,126 @@ def validate_vector(u, dtype=None):
   if u.ndim > 1:
     raise ValueError("Input vector should be 1-D.")
   return u
+
+
+def _initialize_transformer(X, y=None, init='auto', num_dims=None,
+                            verbose=False, random_state=None):
+  """Returns the initial transformer to be used depending on the arguments.
+
+  Parameters
+  ----------
+  init : array-like or None or str
+    The initial matrix.
+
+  X : array-like
+    The input samples.
+
+  y : array-like or None
+    The input labels (or not if there are no labels).
+
+  num_dims : int
+    The number of components to take.
+
+  verbose : bool
+    Whether to print the details of the initialization or not.
+
+  random_state: int or `numpy.RandomState` or None, optional (default=None)
+    A pseudo random number generator object or a seed for it if int. If
+    ``init='random'``, ``random_state`` is used to initialize the random
+    transformation. If ``init='pca'``, ``random_state`` is passed as an
+    argument to PCA when initializing the transformation.
+
+  Returns
+  -------
+  init_transformer : `numpy.ndarray`
+    The initial transformer to use.
+  """
+
+  if num_dims > X.shape[1]:
+    raise ValueError('The preferred dimensionality of the '
+                     'projected space `num_dims` ({}) cannot '
+                     'be greater than the given data '
+                     'dimensionality ({})!'
+                     .format(num_dims, X.shape[1]))
+
+  if isinstance(init, np.ndarray):
+    init = check_array(init)
+
+    # Assert that init.shape[1] = X.shape[1]
+    if init.shape[1] != X.shape[1]:
+      raise ValueError('The input dimensionality ({}) of the given '
+                       'linear transformation `init` must match the '
+                       'dimensionality of the given inputs `X` ({}).'
+                       .format(init.shape[1], X.shape[1]))
+
+    # Assert that init.shape[0] <= init.shape[1]
+    if init.shape[0] > init.shape[1]:
+      raise ValueError('The output dimensionality ({}) of the given '
+                       'linear transformation `init` cannot be '
+                       'greater than its input dimensionality ({}).'
+                       .format(init.shape[0], init.shape[1]))
+
+    if num_dims is not None:
+      # TODO: check for all algos that _iinitialize_transformer is at the
+      #  right place (I think before the checks cf NCA)
+      # Assert that self.num_dims = init.shape[0]
+      if num_dims != init.shape[0]:
+        raise ValueError('The preferred dimensionality of the '
+                         'projected space `num_dims` ({}) does'
+                         ' not match the output dimensionality of '
+                         'the given linear transformation '
+                         '`init` ({})!'
+                         .format(num_dims,
+                                 init.shape[0]))
+  elif init in ['auto', 'pca', 'lda', 'identity', 'random']:
+    pass
+  else:
+    raise ValueError(
+        "`init` must be 'auto', 'pca', 'lda', 'identity', 'random' "
+        "or a numpy array of shape (num_dims, n_features).")
+
+  random_state = check_random_state(random_state)
+  transformation = init
+  if isinstance(init, np.ndarray):
+    pass
+  else:
+    n_samples, n_features = X.shape
+    num_dims = num_dims or n_features
+    if init == 'auto':
+      n_classes = len(np.unique(y))
+      if num_dims <= min(n_features, n_classes - 1):
+        init = 'lda'
+      elif num_dims < min(n_features, n_samples):
+        init = 'pca'
+      else:
+        init = 'identity'
+    if init == 'identity':
+      transformation = np.eye(num_dims, X.shape[1])
+    elif init == 'random':
+      transformation = random_state.randn(num_dims,
+                                          X.shape[1])
+    elif init in {'pca', 'lda'}:
+      init_time = time.time()
+      if init == 'pca':
+        pca = PCA(n_components=num_dims,
+                  random_state=random_state)
+        if verbose:
+          print('Finding principal components... ')
+          sys.stdout.flush()
+        pca.fit(X)
+        transformation = pca.components_
+      elif init == 'lda':
+        lda = LinearDiscriminantAnalysis(n_components=num_dims)
+        if verbose:
+          print('Finding most discriminative components... ')
+          sys.stdout.flush()
+        lda.fit(X, y)
+        transformation = lda.scalings_.T[:num_dims]
+      if verbose:
+        print('done in {:5.2f}s'.format(time.time() - init_time))
+  return transformation
+
+
+def _initialize_metric_mahalanobis():
+  """Returns the initial metric from arguments"""
+  raise NotImplementedError
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index 9f127f58..ab701f87 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self):
 
     Returns
     -------
-    M : `numpy.ndarray`, shape=(n_components, n_features)
+    M : `numpy.ndarray`, shape=(num_dims, n_features)
       The copy of the learned Mahalanobis matrix.
     """
     return self.transformer_.T.dot(self.transformer_)
diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py
index 7a04923d..fac2b58a 100644
--- a/metric_learn/covariance.py
+++ b/metric_learn/covariance.py
@@ -21,7 +21,7 @@ class Covariance(MahalanobisMixin, TransformerMixin):
 
   Attributes
   ----------
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 9b6dccb2..5f4eb5d7 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -145,7 +145,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
 
@@ -213,7 +213,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index f9cd0e91..5a9ecdd2 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -16,18 +16,61 @@
 from six.moves import xrange
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
+
+from metric_learn._util import _initialize_transformer
 from .base_metric import MahalanobisMixin
 
 
 # commonality between LMNN implementations
 class _base_LMNN(MahalanobisMixin, TransformerMixin):
-  def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
-               regularization=0.5, convergence_tol=0.001, use_pca=True,
-               verbose=False, preprocessor=None):
+  def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
+               learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
+               use_pca=True, num_dims=None,
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the LMNN object.
 
     Parameters
     ----------
+    init : string or numpy array, optional (default='auto')
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b).
+
+        'auto'
+            Depending on ``num_dims``, the most reasonable initialization
+            will be chosen. If ``num_dims <= n_classes`` we use 'lda', as
+            it uses labels information. If not, but
+            ``num_dims < min(n_features, n_samples)``, we use 'pca', as
+            it projects data in meaningful directions (those of higher
+            variance). Otherwise, we just use 'identity'.
+
+        'pca'
+            ``num_dims`` principal components of the inputs passed
+            to :meth:`fit` will be used to initialize the transformation.
+            (See `sklearn.decomposition.PCA`)
+
+        'lda'
+            ``min(num_dims, n_classes)`` most discriminative
+            components of the inputs passed to :meth:`fit` will be used to
+            initialize the transformation. (If ``num_dims > n_classes``,
+            the rest of the components will be zero.) (See
+            `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+        'identity'
+            If ``num_dims`` is strictly smaller than the
+            dimensionality of the inputs passed to :meth:`fit`, the identity
+            matrix will be truncated to the first ``num_dims`` rows.
+
+        'random'
+            The initial transformation will be a random array of shape
+            `(num_dims, n_features)`. Each value is sampled from the
+            standard normal distribution.
+
+        numpy array
+            n_features_b must match the dimensionality of the inputs passed to
+            :meth:`fit` and n_features_a must be less than or equal to that.
+            If ``num_dims`` is not None, n_features_a must match it.
+
     k : int, optional
         Number of neighbors to consider, not including self-edges.
 
@@ -37,7 +80,14 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation. If ``init='pca'``, ``random_state`` is passed as an
+        argument to PCA when initializing the transformation.
     """
+    self.init = init
     self.k = k
     self.min_iter = min_iter
     self.max_iter = max_iter
@@ -45,7 +95,9 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
     self.regularization = regularization
     self.convergence_tol = convergence_tol
     self.use_pca = use_pca
+    self.num_dims = num_dims  # FIXME Tmp fix waiting for #167 to be merged:
     self.verbose = verbose
+    self.random_state = random_state
     super(_base_LMNN, self).__init__(preprocessor)
 
 
@@ -60,13 +112,15 @@ def fit(self, X, y):
     X, y = self._prepare_inputs(X, y, dtype=float,
                                 ensure_min_samples=2)
     num_pts, num_dims = X.shape
+    # FIXME Tmp fix waiting for #167 to be merged:
+    n_dims = self.num_dims if self.num_dims is not None else num_dims
     unique_labels, label_inds = np.unique(y, return_inverse=True)
     if len(label_inds) != num_pts:
       raise ValueError('Must have one label per point.')
     self.labels_ = np.arange(len(unique_labels))
-    if self.use_pca:
-      warnings.warn('use_pca does nothing for the python_LMNN implementation')
-    self.transformer_ = np.eye(num_dims)
+    self.transformer_ = _initialize_transformer(X, y, self.init, n_dims,
+                                                self.verbose,
+                                                self.random_state)
     required_k = np.bincount(label_inds).min()
     if self.k > required_k:
       raise ValueError('not enough class labels for specified k'
@@ -99,6 +153,8 @@ def fit(self, X, y):
         self._loss_grad(X, L, dfG, impostors, 1, k, reg, target_neighbors, df,
                         a1, a2))
 
+    it = 1  # we already made one iteration
+
     # main loop
     for it in xrange(2, self.max_iter):
       # then at each iteration, we try to find a value of L that has better
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 536719ba..ac933558 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -139,7 +139,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
@@ -175,7 +175,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 346db2f8..cdb44ddc 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -356,7 +356,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
 
@@ -406,7 +406,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   n_iter_ : `int`
       The number of iterations the solver has run.
 
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index 5abe52e3..38ae3ce2 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -14,6 +14,7 @@
 from sklearn.utils.fixes import logsumexp
 from sklearn.base import TransformerMixin
 
+from metric_learn._util import _initialize_transformer
 from .base_metric import MahalanobisMixin
 
 EPS = np.finfo(float).eps
@@ -31,12 +32,52 @@ class NCA(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims=None, max_iter=100, tol=None, verbose=False,
-               preprocessor=None):
+  def __init__(self, init='auto', num_dims=None, max_iter=100, tol=None,
+               verbose=False, preprocessor=None, random_state=None):
     """Neighborhood Components Analysis
 
     Parameters
     ----------
+    init : string or numpy array, optional (default='auto')
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b).
+
+        'auto'
+            Depending on ``num_dims``, the most reasonable initialization
+            will be chosen. If ``num_dims <= n_classes`` we use 'lda', as
+            it uses labels information. If not, but
+            ``num_dims < min(n_features, n_samples)``, we use 'pca', as
+            it projects data in meaningful directions (those of higher
+            variance). Otherwise, we just use 'identity'.
+
+        'pca'
+            ``num_dims`` principal components of the inputs passed
+            to :meth:`fit` will be used to initialize the transformation.
+            (See `sklearn.decomposition.PCA`)
+
+        'lda'
+            ``min(num_dims, n_classes)`` most discriminative
+            components of the inputs passed to :meth:`fit` will be used to
+            initialize the transformation. (If ``num_dims > n_classes``,
+            the rest of the components will be zero.) (See
+            `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+        'identity'
+            If ``num_dims`` is strictly smaller than the
+            dimensionality of the inputs passed to :meth:`fit`, the identity
+            matrix will be truncated to the first ``num_dims`` rows.
+
+        'random'
+            The initial transformation will be a random array of shape
+            `(num_dims, n_features)`. Each value is sampled from the
+            standard normal distribution.
+
+        numpy array
+            n_features_b must match the dimensionality of the inputs passed to
+            :meth:`fit` and n_features_a must be less than or equal to that.
+            If ``num_dims`` is not None, n_features_a must match it.
+
     num_dims : int, optional (default=None)
       Embedding dimensionality. If None, will be set to ``n_features``
       (``d``) at fit time.
@@ -49,11 +90,19 @@ def __init__(self, num_dims=None, max_iter=100, tol=None, verbose=False,
 
     verbose : bool, optional (default=False)
       Whether to print progress messages or not.
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation. If ``init='pca'``, ``random_state`` is passed as an
+        argument to PCA when initializing the transformation.
     """
+    self.init = init
     self.num_dims = num_dims
     self.max_iter = max_iter
     self.tol = tol
     self.verbose = verbose
+    self.random_state = random_state
     super(NCA, self).__init__(preprocessor)
 
   def fit(self, X, y):
@@ -71,8 +120,7 @@ def fit(self, X, y):
     train_time = time.time()
 
     # Initialize A to a scaling matrix
-    A = np.zeros((num_dims, d))
-    np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS)))
+    A = _initialize_transformer(X, labels, self.init, num_dims, self.verbose)
 
     # Run NCA
     mask = labels[:, np.newaxis] == labels[np.newaxis, :]
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index e9828d07..4827cc39 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -135,7 +135,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
 
   Attributes
   ----------
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
 
@@ -183,7 +183,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
 
   Attributes
   ----------
-  transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
+  transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
       The linear transformation ``L`` deduced from the learned Mahalanobis
       metric (See function `transformer_from_metric`.)
   """
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index a785d60d..d5c54538 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -475,9 +475,7 @@ def test_singleton_class(self):
 
       EPS = np.finfo(float).eps
       A = np.zeros((X.shape[1], X.shape[1]))
-      np.fill_diagonal(A,
-                       1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS)))
-      nca = NCA(max_iter=30, num_dims=X.shape[1])
+      nca = NCA(init=A, max_iter=30, num_dims=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.transformer_, A)
 
@@ -488,9 +486,7 @@ def test_one_class(self):
       y = self.iris_labels[self.iris_labels == 0]
       EPS = np.finfo(float).eps
       A = np.zeros((X.shape[1], X.shape[1]))
-      np.fill_diagonal(A,
-                       1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS)))
-      nca = NCA(max_iter=30, num_dims=X.shape[1])
+      nca = NCA(init=A, max_iter=30, num_dims=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.transformer_, A)
 
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 6c9a6dc5..4bb3dc14 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -16,13 +16,15 @@ def test_covariance(self):
   def test_lmnn(self):
     self.assertRegexpMatches(
         str(metric_learn.LMNN()),
-        r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, "
-        r"max_iter=1000,\n      min_iter=50, preprocessor=None, "
-        r"regularization=0.5, use_pca=True,\n      verbose=False\)")
+        r"(python_)?LMNN\(convergence_tol=0.001, init='auto', k=3, "
+        r"learn_rate=1e-07,\n      max_iter=1000, min_iter=50, "
+        r"num_dims=None, preprocessor=None,\n      random_state=None, "
+        r"regularization=0.5, use_pca=True, verbose=False\)")
 
   def test_nca(self):
     self.assertEqual(str(metric_learn.NCA()),
-                     "NCA(max_iter=100, num_dims=None, preprocessor=None, "
+                     "NCA(init='auto', max_iter=100, num_dims=None, "
+                     "preprocessor=None,\n  random_state=None, "
                      "tol=None, verbose=False)")
 
   def test_lfda(self):
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 15bf1aed..4e4dc907 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -14,7 +14,8 @@
                                       _PairsClassifierMixin)
 
 from test.test_utils import (ids_metric_learners, metric_learners,
-                             remove_y_quadruplets)
+                             remove_y_quadruplets, ids_regressors,
+                             ids_supervised_learners, supervised_learners)
 
 RNG = check_random_state(0)
 
@@ -56,7 +57,7 @@ def test_score_pairs_toy_example(estimator, build_dataset):
     pairs = np.stack([X[:10], X[10:20]], axis=1)
     embedded_pairs = pairs.dot(model.transformer_.T)
     distances = np.sqrt(np.sum((embedded_pairs[:, 1] -
-                               embedded_pairs[:, 0])**2,
+                                embedded_pairs[:, 0])**2,
                                axis=-1))
     assert_array_almost_equal(model.score_pairs(pairs), distances)
 
@@ -138,9 +139,6 @@ def test_embed_dim(estimator, build_dataset):
   assert str(raised_error.value) == err_msg
   # we test that the shape is also OK when doing dimensionality reduction
   if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}:
-    # TODO:
-    #  avoid this enumeration and rather test if hasattr n_components
-    #  as soon as we have made the arguments names as such (issue #167)
     model.set_params(num_dims=2)
     model.fit(*remove_y_quadruplets(estimator, input_data, labels))
     assert model.transform(X).shape == (X.shape[0], 2)
@@ -303,3 +301,149 @@ def test_transformer_is_2D(estimator, build_dataset):
     labels = labels[to_keep]
   model.fit(*remove_y_quadruplets(estimator, trunc_data, labels))
   assert model.transformer_.shape == (1, 1)  # the transformer must be 2D
+
+
+@pytest.mark.parametrize('estimator, build_dataset',
+                         [(ml, bd) for idml, (ml, bd)
+                          in zip(ids_supervised_learners,
+                                 supervised_learners)
+                          if hasattr(ml, 'num_dims') and
+                          hasattr(ml, 'init') and
+                          (idml not in ids_regressors)],
+                         ids=[idml for idml, (ml, _)
+                              in zip(ids_supervised_learners,
+                                     supervised_learners)
+                              if hasattr(ml, 'num_dims') and
+                              hasattr(ml, 'init') and
+                              (idml not in ids_regressors)])
+def test_init_transformation(estimator, build_dataset):
+    input_data, labels, _, X = build_dataset()
+    model = clone(estimator)
+    rng = np.random.RandomState(42)
+
+    # Start learning from scratch
+    model.set_params(init='identity')
+    model.fit(input_data, labels)
+
+    # Initialize with random
+    model.set_params(init='random')
+    model.fit(input_data, labels)
+
+    # Initialize with auto
+    model.set_params(init='auto')
+    model.fit(input_data, labels)
+
+    # Initialize with PCA
+    model.set_params(init='pca')
+    model.fit(input_data, labels)
+
+    # Initialize with LDA
+    model.set_params(init='lda')
+    model.fit(input_data, labels)
+
+    init = rng.rand(X.shape[1], X.shape[1])
+    model.set_params(init=init)
+    model.fit(input_data, labels)
+
+    # init.shape[1] must match X.shape[1]
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    model.set_params(init=init)
+    msg = ('The input dimensionality ({}) of the given '
+           'linear transformation `init` must match the '
+           'dimensionality of the given inputs `X` ({}).'
+           .format(init.shape[1], X.shape[1]))
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
+    # init.shape[0] must be <= init.shape[1]
+    init = rng.rand(X.shape[1] + 1, X.shape[1])
+    model.set_params(init=init)
+    msg = ('The output dimensionality ({}) of the given '
+           'linear transformation `init` cannot be '
+           'greater than its input dimensionality ({}).'
+           .format(init.shape[0], init.shape[1]))
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
+    # init.shape[0] must match num_dims
+    init = rng.rand(X.shape[1], X.shape[1])
+    num_dims = X.shape[1] - 1
+    model.set_params(init=init, num_dims=num_dims)
+    msg = ('The preferred dimensionality of the '
+           'projected space `num_dims` ({}) does not match '
+           'the output dimensionality of the given '
+           'linear transformation `init` ({})!'
+           .format(num_dims, init.shape[0]))
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
+    # init must be as specified in the docstring
+    model.set_params(init=1)
+    msg = ("`init` must be 'auto', 'pca', 'lda', 'identity', "
+           "'random' or a numpy array of shape "
+           "(num_dims, n_features).")
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
+
+@pytest.mark.parametrize('n_samples', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_features', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_classes', [5, 7, 11])
+@pytest.mark.parametrize('num_dims', [3, 5, 7, 11])
+@pytest.mark.parametrize('estimator, build_dataset',
+                         [(ml, bd) for idml, (ml, bd)
+                          in zip(ids_supervised_learners,
+                                 supervised_learners)
+                          if hasattr(ml, 'num_dims') and
+                          hasattr(ml, 'init') and
+                          (idml not in ids_regressors)],
+                         ids=[idml for idml, (ml, _)
+                              in zip(ids_supervised_learners,
+                                     supervised_learners)
+                              if hasattr(ml, 'num_dims') and
+                              hasattr(ml, 'init') and
+                              (idml not in ids_regressors)])
+def test_auto_init(n_samples, n_features, n_classes, num_dims,
+                   estimator, build_dataset):
+  # Test that auto choose the init as expected with every configuration
+  # of order of n_samples, n_features, n_classes and num_dims.
+  input_data, labels, _, X = build_dataset()
+  model_base = clone(estimator)
+  rng = np.random.RandomState(42)
+  model_base.set_params(init='auto',
+                        num_dims=num_dims,
+                        random_state=rng)
+
+  # To make the test work for LMNN:
+  if 'LMNN' in model_base.__class__.__name__:
+    model_base.set_params(k=1)
+  # To make the test faster for estimators that have a max_iter:
+  if hasattr(model_base, 'max_iter'):
+    model_base.set_params(max_iter=1)
+  if n_classes >= n_samples:
+    pass
+    # n_classes > n_samples is impossible, and n_classes == n_samples
+    # throws an error from lda but is an absurd case
+  else:
+    X = rng.randn(n_samples, n_features)
+    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+    if num_dims > n_features:
+      # this would return a ValueError, which is tested in
+      # test_init_transformation
+      pass
+    else:
+      model = clone(model_base)
+      model.fit(X, y)
+      if num_dims <= min(n_classes - 1, n_features):
+        model_other = clone(model_base).set_params(init='lda')
+      elif num_dims < min(n_features, n_samples):
+        model_other = clone(model_base).set_params(init='pca')
+      else:
+        model_other = clone(model_base).set_params(init='identity')
+      model_other.fit(X, y)
+      assert_array_almost_equal(model.transformer_,
+                                model_other.transformer_)

From 27eb74ba37ee2f3ce309adf792ea1177c9045d59 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 24 Apr 2019 16:29:58 +0200
Subject: [PATCH 04/52] Let the check of num_dims be done in the other PR

---
 metric_learn/_util.py | 23 ++++++++---------------
 metric_learn/lmnn.py  |  2 +-
 metric_learn/nca.py   |  2 +-
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index cfed1caa..1b354ff1 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -410,14 +410,16 @@ def validate_vector(u, dtype=None):
   return u
 
 
-def _initialize_transformer(X, y=None, init='auto', num_dims=None,
-                            verbose=False, random_state=None):
+def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
+                            random_state=None):
   """Returns the initial transformer to be used depending on the arguments.
 
   Parameters
   ----------
-  init : array-like or None or str
-    The initial matrix.
+  num_dims : int
+    The number of components to take. (Note: it should have been checked
+    before, meaning it should not be None and it should be a value in
+    [1, X.shape[1]])
 
   X : array-like
     The input samples.
@@ -425,8 +427,8 @@ def _initialize_transformer(X, y=None, init='auto', num_dims=None,
   y : array-like or None
     The input labels (or not if there are no labels).
 
-  num_dims : int
-    The number of components to take.
+  init : array-like or None or str
+    The initial matrix.
 
   verbose : bool
     Whether to print the details of the initialization or not.
@@ -443,13 +445,6 @@ def _initialize_transformer(X, y=None, init='auto', num_dims=None,
     The initial transformer to use.
   """
 
-  if num_dims > X.shape[1]:
-    raise ValueError('The preferred dimensionality of the '
-                     'projected space `num_dims` ({}) cannot '
-                     'be greater than the given data '
-                     'dimensionality ({})!'
-                     .format(num_dims, X.shape[1]))
-
   if isinstance(init, np.ndarray):
     init = check_array(init)
 
@@ -468,8 +463,6 @@ def _initialize_transformer(X, y=None, init='auto', num_dims=None,
                        .format(init.shape[0], init.shape[1]))
 
     if num_dims is not None:
-      # TODO: check for all algos that _iinitialize_transformer is at the
-      #  right place (I think before the checks cf NCA)
       # Assert that self.num_dims = init.shape[0]
       if num_dims != init.shape[0]:
         raise ValueError('The preferred dimensionality of the '
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 5a9ecdd2..2db4caae 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -118,7 +118,7 @@ def fit(self, X, y):
     if len(label_inds) != num_pts:
       raise ValueError('Must have one label per point.')
     self.labels_ = np.arange(len(unique_labels))
-    self.transformer_ = _initialize_transformer(X, y, self.init, n_dims,
+    self.transformer_ = _initialize_transformer(n_dims, X, y, self.init,
                                                 self.verbose,
                                                 self.random_state)
     required_k = np.bincount(label_inds).min()
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index 38ae3ce2..6468654e 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -120,7 +120,7 @@ def fit(self, X, y):
     train_time = time.time()
 
     # Initialize A to a scaling matrix
-    A = _initialize_transformer(X, labels, self.init, num_dims, self.verbose)
+    A = _initialize_transformer(num_dims, X, labels, self.init, self.verbose)
 
     # Run NCA
     mask = labels[:, np.newaxis] == labels[np.newaxis, :]

From 4395c1371f9b7ecb37d463b02b3f6c39a92c9568 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 2 May 2019 11:56:10 +0200
Subject: [PATCH 05/52] Add metric initialization for algorithms that learn a
 mahalanobis matrix

---
 metric_learn/_util.py          | 81 ++++++++++++++++++++++++++++--
 metric_learn/itml.py           | 78 ++++++++++++++++++++++++----
 metric_learn/lsml.py           | 64 +++++++++++++++++++----
 metric_learn/mmc.py            | 92 +++++++++++++++++++++++++++-------
 metric_learn/sdml.py           | 79 +++++++++++++++++++++++------
 test/test_base_metric.py       | 43 +++++++++-------
 test/test_mahalanobis_mixin.py | 65 +++++++++++++++++++++++-
 7 files changed, 424 insertions(+), 78 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 1b354ff1..09e99bdf 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -4,8 +4,9 @@
 from sklearn.decomposition import PCA
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_X_y, check_random_state
-from metric_learn.exceptions import PreprocessorError
+from .exceptions import PreprocessorError
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from scipy.linalg import pinvh
 import sys
 import time
 
@@ -429,6 +430,7 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
 
   init : array-like or None or str
     The initial matrix.
+    # TODO: put the complete doc here
 
   verbose : bool
     Whether to print the details of the initialization or not.
@@ -521,6 +523,77 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
   return transformation
 
 
-def _initialize_metric_mahalanobis():
-  """Returns the initial metric from arguments"""
-  raise NotImplementedError
+def _initialize_metric_mahalanobis(pairs, init='identity', random_state=None,
+                                   return_inverse=False):
+  """Returns the initial mahalanobis matrix to be used depending on the
+  arguments.
+
+  Parameters
+  ----------
+  pairs : array-like
+    The input samples.
+
+  init : array-like or None or str
+    The initial matrix.
+
+  random_state : int or `numpy.RandomState` or None, optional (default=None)
+    A pseudo random number generator object or a seed for it if int. If
+    ``init='random'``, ``random_state`` is used to initialize the random
+    matrix. If ``init='pca'``, ``random_state`` is passed as an
+    argument to PCA when initializing the matrix.
+
+  return_inverse : bool, optional (default=False)
+    Whether to return the inverse of the matrix initializing the metric. This
+    can be sometimes useful.
+
+  Returns
+  -------
+  M, or (M, M_inv) : `numpy.ndarray`
+    The initial matrix to use M, and its inverse if `return_inverse=True`.
+  """
+
+  if isinstance(init, np.ndarray):
+    init = check_array(init)  # TODO: do we want to copy the array ?
+    # see how they do it in scikit-learn for instance
+
+    # Assert that init.shape[1] = pairs.shape[2]
+    if (init.shape) != (pairs.shape[2],) * 2:
+      raise ValueError('The input dimensionality ({}) of the given '
+                       'mahalanobis matrix `init` must match the '
+                       'dimensionality of the given inputs ({}).'
+                       .format(init.shape, pairs.shape[2]))
+
+  elif init in ['identity', 'covariance', 'random']:
+    pass
+  else:
+    raise ValueError(
+        "`init` must be 'identity', 'covariance', 'random' "
+        "or a numpy array of shape (num_dims, n_features).")
+
+  random_state = check_random_state(random_state)
+  M = init
+  if isinstance(init, np.ndarray):
+    if return_inverse:
+      M_inv = pinvh(M)
+  else:
+    n_features = pairs.shape[2]
+    if init == 'identity':
+      M = np.eye(n_features, n_features)
+      if return_inverse:
+        M_inv = M.copy()
+    if init == 'covariance':
+      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
+      M_inv = np.atleast_2d(np.cov(X, rowvar=False))
+      # TODO: check atleast_2d necessary
+      M = pinvh(M_inv)
+    elif init == 'random':
+      # we need to create a random symmetric matrix
+      M = random_state.randn(n_features,
+                             n_features)
+      M = np.tril(M) + np.tril(M, -1).T
+      if return_inverse:
+        M_inv = pinvh(M)
+  if return_inverse:
+    return (M, M_inv)
+  else:
+    return M
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 5f4eb5d7..aeac9f6b 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -22,7 +22,7 @@
 from sklearn.base import TransformerMixin
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
-from ._util import vector_norm, transformer_from_metric
+from ._util import transformer_from_metric, _initialize_metric_mahalanobis
 
 
 class _BaseITML(MahalanobisMixin):
@@ -31,7 +31,8 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               A0=None, verbose=False, preprocessor=None):
+               init='identity', A0=None, verbose=False, preprocessor=None,
+               random_state=None):
     """Initialize ITML.
 
     Parameters
@@ -43,6 +44,26 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
     convergence_threshold : float, optional
 
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
     A0 : (d x d) matrix, optional
         initial regularization matrix, defaults to identity
 
@@ -52,12 +73,21 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     self.gamma = gamma
     self.max_iter = max_iter
     self.convergence_threshold = convergence_threshold
-    self.A0 = A0
+    self.init = init  # explain that it is good to keep the scale with
+    # the bounds
+    # TODO: see for other inits how it behave wrt the bound
+    self.A0 = A0  # TODO: deprecate
     self.verbose = verbose
+    self.random_state = random_state
     super(_BaseITML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y, bounds=None):
@@ -70,12 +100,11 @@ def _fit(self, pairs, y, bounds=None):
     else:
       assert len(bounds) == 2
       self.bounds_ = bounds
-    self.bounds_[self.bounds_==0] = 1e-9
+    self.bounds_[self.bounds_ == 0] = 1e-9
     # init metric
-    if self.A0 is None:
-      A = np.identity(pairs.shape[2])
-    else:
-      A = check_array(self.A0, copy=True)
+    # pairs will be deduplicated into X two times, see how to avoid that
+    A = _initialize_metric_mahalanobis(pairs, self.init, self.random_state)
+
     gamma = self.gamma
     pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
     num_pos = len(pos_pairs)
@@ -220,7 +249,8 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
                num_labeled='deprecated', num_constraints=None,
-               bounds='deprecated', A0=None, verbose=False, preprocessor=None):
+               bounds='deprecated', init='identity', A0=None, verbose=False,
+               preprocessor=None, random_state=None):
     """Initialize the supervised version of `ITML`.
 
     `ITML_Supervised` creates pairs of similar sample by taking same class
@@ -244,17 +274,43 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
           `bounds` was deprecated in version 0.5.0 and will
           be removed in 0.6.0. Set `bounds` at fit time instead :
           `itml_supervised.fit(X, y, bounds=...)`
-    A0 : (d x d) matrix, optional
+
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
+    A0 : (d x d) matrix, optional  # TODO: deprecate
         initial regularization matrix, defaults to identity
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
                        convergence_threshold=convergence_threshold,
-                       A0=A0, verbose=verbose, preprocessor=preprocessor)
+                       A0=A0, verbose=verbose, preprocessor=preprocessor,
+                       random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
     self.bounds = bounds
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index ac933558..a6c4497f 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -16,33 +16,61 @@
 
 from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints
-from ._util import transformer_from_metric
+from ._util import transformer_from_metric, _initialize_metric_mahalanobis
 
 
 class _BaseLSML(MahalanobisMixin):
 
   _tuple_size = 4  # constraints are quadruplets
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False,
-               preprocessor=None):
+  def __init__(self, tol=1e-3, max_iter=1000, init='identity',
+               prior=None, verbose=False, preprocessor=None,
+               random_state=None):
     """Initialize LSML.
 
     Parameters
     ----------
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
     tol : float, optional
     max_iter : int, optional
-    prior : (d x d) matrix, optional
+    prior : (d x d) matrix, optional  # TODO: deprecate, and explain how to set
+        #the new init (the inverse of the prior)
         guess at a metric [default: inv(covariance(X))]
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
+    self.init = init
     self.prior = prior
     self.tol = tol
     self.max_iter = max_iter
     self.verbose = verbose
+    self.random_state = random_state
     super(_BaseLSML, self).__init__(preprocessor)
 
   def _fit(self, quadruplets, weights=None):
@@ -59,6 +87,8 @@ def _fit(self, quadruplets, weights=None):
     else:
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
+    M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.init,
+                                                  return_inverse=True)
     if self.prior is None:
       X = np.vstack({tuple(row) for row in
                      quadruplets.reshape(-1, quadruplets.shape[2])})
@@ -180,10 +210,9 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior=None,
+  def __init__(self, tol=1e-3, max_iter=1000, init='identity', prior=None,
                num_labeled='deprecated', num_constraints=None, weights=None,
-               verbose=False,
-               preprocessor=None):
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `LSML`.
 
     `LSML_Supervised` creates quadruplets from labeled samples by taking two
@@ -195,6 +224,18 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None,
     ----------
     tol : float, optional
     max_iter : int, optional
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
     prior : (d x d) matrix, optional
         guess at a metric [default: covariance(X)]
     num_labeled : Not used
@@ -210,9 +251,14 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
-    _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
-                       verbose=verbose, preprocessor=preprocessor)
+    _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, init=init,
+                       prior=prior, verbose=verbose, preprocessor=preprocessor,
+                       random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
     self.weights = weights
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index cdb44ddc..788de784 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -25,7 +25,7 @@
 
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
-from ._util import vector_norm, transformer_from_metric
+from ._util import transformer_from_metric, _initialize_metric_mahalanobis
 
 
 class _BaseMMC(MahalanobisMixin):
@@ -34,14 +34,40 @@ class _BaseMMC(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
-               A0=None, diagonal=False, diagonal_c=1.0, verbose=False,
-               preprocessor=None):
+               init='identity', A0=None, diagonal=False, diagonal_c=1.0,
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize MMC.
     Parameters
     ----------
     max_iter : int, optional
     max_proj : int, optional
     convergence_threshold : float, optional
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
+    verbose : bool, optional
+        if True, prints information while learning
+
+    preprocessor : array-like, shape=(n_samples, n_features) or callable
+        The preprocessor to call to get tuples from indices. If array-like,
+        tuples will be gotten like this: X[indices].
     A0 : (d x d) matrix, optional
         initial metric, defaults to identity
         only the main diagonal is taken if `diagonal == True`
@@ -56,29 +82,28 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be gotten like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     self.max_iter = max_iter
     self.max_proj = max_proj
     self.convergence_threshold = convergence_threshold
-    self.A0 = A0
+    self.init = init
+    self.A0 = A0  # TODO: deprecate
     self.diagonal = diagonal
     self.diagonal_c = diagonal_c
     self.verbose = verbose
+    self.random_state = random_state
     super(_BaseMMC, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
-    # init metric
-    if self.A0 is None:
-      self.A_ = np.identity(pairs.shape[2])
-      if not self.diagonal:
-        # Don't know why division by 10... it's in the original code
-        # and seems to affect the overall scale of the learned metric.
-        self.A_ /= 10.0
-    else:
-      self.A_ = check_array(self.A0)
+    self.A_ = _initialize_metric_mahalanobis(pairs, self.init,
+                                             random_state=self.random_state)
 
     if self.diagonal:
       return self._fit_diag(pairs, y)
@@ -412,9 +437,9 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   """
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
-               num_labeled='deprecated', num_constraints=None, A0=None,
-               diagonal=False, diagonal_c=1.0, verbose=False,
-               preprocessor=None):
+               num_labeled='deprecated', num_constraints=None, init='identity',
+               A0=None, diagonal=False, diagonal_c=1.0, verbose=False,
+               preprocessor=None, random_state=None):
     """Initialize the supervised version of `MMC`.
 
     `MMC_Supervised` creates pairs of similar sample by taking same class
@@ -432,6 +457,32 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
          be removed in 0.6.0.
     num_constraints: int, optional
         number of constraints to generate
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
+    verbose : bool, optional
+        if True, prints information while learning
+
+    preprocessor : array-like, shape=(n_samples, n_features) or callable
+        The preprocessor to call to get tuples from indices. If array-like,
+        tuples will be gotten like this: X[indices].
     A0 : (d x d) matrix, optional
         initial metric, defaults to identity
         only the main diagonal is taken if `diagonal == True`
@@ -446,11 +497,16 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
                       convergence_threshold=convergence_threshold,
-                      A0=A0, diagonal=diagonal, diagonal_c=diagonal_c,
-                      verbose=verbose, preprocessor=preprocessor)
+                      init=init, A0=A0, diagonal=diagonal,
+                      diagonal_c=diagonal_c, verbose=verbose,
+                      preprocessor=preprocessor, random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 4827cc39..e9e17784 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -18,7 +18,7 @@
 
 from .base_metric import MahalanobisMixin, _PairsClassifierMixin
 from .constraints import Constraints, wrap_pairs
-from ._util import transformer_from_metric
+from ._util import transformer_from_metric, _initialize_metric_mahalanobis
 try:
   from inverse_covariance import quic
 except ImportError:
@@ -31,8 +31,9 @@ class _BaseSDML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True,
-               verbose=False, preprocessor=None):
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
+               use_cov=True, verbose=False, preprocessor=None,
+               random_state=None):
     """
     Parameters
     ----------
@@ -42,7 +43,27 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True,
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
 
-    use_cov : bool, optional
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
+
+    use_cov : bool, optional  # TODO: to deprecate
         controls prior matrix, will use the identity if use_cov=False
 
     verbose : bool, optional
@@ -51,11 +72,18 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be gotten like this: X[indices].
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     self.balance_param = balance_param
     self.sparsity_param = sparsity_param
-    self.use_cov = use_cov
+    self.init = init
+    self.use_cov = use_cov  # TODO: deprecate and replace by init
     self.verbose = verbose
+    self.random_state = random_state
     super(_BaseSDML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
@@ -69,11 +97,7 @@ def _fit(self, pairs, y):
                                     type_of_inputs='tuples')
 
     # set up (the inverse of) the prior M
-    if self.use_cov:
-      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
-      prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
-    else:
-      prior_inv = np.identity(pairs.shape[2])
+    prior_inv = pinvh(_initialize_metric_mahalanobis(pairs, self.init))
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
     emp_cov = prior_inv + self.balance_param * loss_matrix
@@ -188,20 +212,40 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True,
-               num_labeled='deprecated', num_constraints=None, verbose=False,
-               preprocessor=None):
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
+               use_cov=True, num_labeled='deprecated', num_constraints=None,
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `SDML`.
 
     `SDML_Supervised` creates pairs of similar sample by taking same class
     samples, and pairs of dissimilar samples by taking different class
     samples. It then passes these pairs to `SDML` for training.
+
     Parameters
     ----------
     balance_param : float, optional
         trade off between sparsity and M0 prior
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
+    init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
     use_cov : bool, optional
         controls prior matrix, will use the identity if use_cov=False
     num_labeled : Not used
@@ -215,10 +259,15 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation.
     """
     _BaseSDML.__init__(self, balance_param=balance_param,
-                       sparsity_param=sparsity_param, use_cov=use_cov,
-                       verbose=verbose, preprocessor=preprocessor)
+                       sparsity_param=sparsity_param, init=init,
+                       use_cov=use_cov, verbose=verbose,
+                       preprocessor=preprocessor, random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 4bb3dc14..b4d2aa25 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -34,34 +34,37 @@ def test_lfda(self):
 
   def test_itml(self):
     self.assertEqual(str(metric_learn.ITML()), """
-ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000,
-   preprocessor=None, verbose=False)
+ITML(A0=None, convergence_threshold=0.001, gamma=1.0, init='identity',
+   max_iter=1000, preprocessor=None, random_state=None, verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.ITML_Supervised()), """
 ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001,
-        gamma=1.0, max_iter=1000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, verbose=False)
+        gamma=1.0, init='identity', max_iter=1000, num_constraints=None,
+        num_labeled='deprecated', preprocessor=None, random_state=None,
+        verbose=False)
 """.strip('\n'))
 
   def test_lsml(self):
     self.assertEqual(
-        str(metric_learn.LSML()),
-        "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, "
-        "verbose=False)")
+        str(metric_learn.LSML()), """
+LSML(init='identity', max_iter=1000, preprocessor=None, prior=None,
+   random_state=None, tol=0.001, verbose=False)
+""".strip('\n'))
     self.assertEqual(str(metric_learn.LSML_Supervised()), """
-LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated',
-        preprocessor=None, prior=None, tol=0.001, verbose=False,
-        weights=None)
+LSML_Supervised(init='identity', max_iter=1000, num_constraints=None,
+        num_labeled='deprecated', preprocessor=None, prior=None,
+        random_state=None, tol=0.001, verbose=False, weights=None)
 """.strip('\n'))
 
   def test_sdml(self):
-    self.assertEqual(str(metric_learn.SDML()),
-                     "SDML(balance_param=0.5, preprocessor=None, "
-                     "sparsity_param=0.01, use_cov=True,\n   verbose=False)")
+    self.assertEqual(str(metric_learn.SDML()), """
+SDML(balance_param=0.5, init='identity', preprocessor=None, random_state=None,
+   sparsity_param=0.01, use_cov=True, verbose=False)
+""".strip('\n'))
     self.assertEqual(str(metric_learn.SDML_Supervised()), """
-SDML_Supervised(balance_param=0.5, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, sparsity_param=0.01,
-        use_cov=True, verbose=False)
+SDML_Supervised(balance_param=0.5, init='identity', num_constraints=None,
+        num_labeled='deprecated', preprocessor=None, random_state=None,
+        sparsity_param=0.01, use_cov=True, verbose=False)
 """.strip('\n'))
 
   def test_rca(self):
@@ -80,12 +83,14 @@ def test_mlkr(self):
   def test_mmc(self):
     self.assertEqual(str(metric_learn.MMC()), """
 MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0,
-  max_iter=100, max_proj=10000, preprocessor=None, verbose=False)
+  init='identity', max_iter=100, max_proj=10000, preprocessor=None,
+  random_state=None, verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.MMC_Supervised()), """
 MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False,
-        diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, verbose=False)
+        diagonal_c=1.0, init='identity', max_iter=100, max_proj=10000,
+        num_constraints=None, num_labeled='deprecated', preprocessor=None,
+        random_state=None, verbose=False)
 """.strip('\n'))
 
 
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 4e4dc907..1f13cceb 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -341,6 +341,7 @@ def test_init_transformation(estimator, build_dataset):
     model.set_params(init='lda')
     model.fit(input_data, labels)
 
+    # Initialize with a numpy array
     init = rng.rand(X.shape[1], X.shape[1])
     model.set_params(init=init)
     model.fit(input_data, labels)
@@ -407,8 +408,8 @@ def test_init_transformation(estimator, build_dataset):
                               if hasattr(ml, 'num_dims') and
                               hasattr(ml, 'init') and
                               (idml not in ids_regressors)])
-def test_auto_init(n_samples, n_features, n_classes, num_dims,
-                   estimator, build_dataset):
+def test_auto_init_transformation(n_samples, n_features, n_classes, num_dims,
+                                  estimator, build_dataset):
   # Test that auto choose the init as expected with every configuration
   # of order of n_samples, n_features, n_classes and num_dims.
   input_data, labels, _, X = build_dataset()
@@ -447,3 +448,63 @@ def test_auto_init(n_samples, n_features, n_classes, num_dims,
       model_other.fit(X, y)
       assert_array_almost_equal(model.transformer_,
                                 model_other.transformer_)
+
+
+@pytest.mark.parametrize('estimator, build_dataset',
+                         [(ml, bd) for idml, (ml, bd)
+                          in zip(ids_metric_learners,
+                                 metric_learners)
+                          if not hasattr(ml, 'num_dims') and
+                          hasattr(ml, 'init')],
+                         ids=[idml for idml, (ml, _)
+                              in zip(ids_metric_learners,
+                                     metric_learners)
+                              if not hasattr(ml, 'num_dims') and
+                              hasattr(ml, 'init')])
+def test_init_mahalanobis(estimator, build_dataset):
+    """Tests that for estimators that learn a mahalanobis matrix
+    instead of a transformer, i.e. those that are mahalanobis metric learners
+    where we can change the init, but not choose the num_dims, (TODO: be more
+    explicit on this characterization, for instance with safe_flags like in
+    scikit-learn) that the init has an expected behaviour.
+    """
+    input_data, labels, _, X = build_dataset()
+    model = clone(estimator)
+    rng = np.random.RandomState(42)
+
+    # Start learning from scratch
+    model.set_params(init='identity')
+    model.fit(input_data, labels)
+
+    # Initialize with random
+    model.set_params(init='random')
+    model.fit(input_data, labels)
+
+    # Initialize with covariance
+    model.set_params(init='covariance')
+    model.fit(input_data, labels)
+
+    # Initialize with a numpy array
+    init = rng.rand(X.shape[1], X.shape[1])
+    model.set_params(init=init)
+    model.fit(input_data, labels)
+
+    # init.shape[1] must match X.shape[1]
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    model.set_params(init=init)
+    msg = ('The input dimensionality ({}) of the given '
+           'linear transformation `init` must match the '
+           'dimensionality of the given inputs `X` ({}).'
+           .format(init.shape[1], X.shape[1]))
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
+    # init must be as specified in the docstring
+    model.set_params(init=1)
+    msg = ("`init` must be 'identity', 'covariance'"
+           "'random' or a numpy array of shape "
+           "(n_features, n_features).")
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg

From 09fda87d466ec06295304d63c73e51830be8968e Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 2 May 2019 15:12:49 +0200
Subject: [PATCH 06/52] Add initialization for MLKR

---
 metric_learn/_util.py          | 130 ++++++++++++++++++++++-----------
 metric_learn/mlkr.py           |  59 ++++++++++++---
 test/test_mahalanobis_mixin.py |  55 +++++++-------
 3 files changed, 162 insertions(+), 82 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 09e99bdf..81b67666 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -6,6 +6,7 @@
 from sklearn.utils.validation import check_X_y, check_random_state
 from .exceptions import PreprocessorError
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.utils.multiclass import type_of_target
 from scipy.linalg import pinvh
 import sys
 import time
@@ -411,8 +412,8 @@ def validate_vector(u, dtype=None):
   return u
 
 
-def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
-                            random_state=None):
+def _initialize_transformer(num_dims, input, y=None, init='auto',
+                            verbose=False, random_state=None):
   """Returns the initial transformer to be used depending on the arguments.
 
   Parameters
@@ -422,15 +423,31 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
     before, meaning it should not be None and it should be a value in
     [1, X.shape[1]])
 
-  X : array-like
-    The input samples.
+  input : array-like
+    The input samples (can be tuples or regular samples).
 
   y : array-like or None
     The input labels (or not if there are no labels).
 
-  init : array-like or None or str
-    The initial matrix.
-    # TODO: put the complete doc here
+  init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
+
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
 
   verbose : bool
     Whether to print the details of the initialization or not.
@@ -446,16 +463,23 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
   init_transformer : `numpy.ndarray`
     The initial transformer to use.
   """
+  # if we are doing a regression we cannot use lda:
+  n_features = input.shape[-1]
+  authorized_inits = ['auto', 'pca', 'identity', 'random']
+  is_classification = (type_of_target(y) in ['multiclass',
+                                             'binary'])
+  if is_classification:
+    authorized_inits.append('lda')
 
   if isinstance(init, np.ndarray):
     init = check_array(init)
 
     # Assert that init.shape[1] = X.shape[1]
-    if init.shape[1] != X.shape[1]:
+    if init.shape[1] != n_features:
       raise ValueError('The input dimensionality ({}) of the given '
                        'linear transformation `init` must match the '
                        'dimensionality of the given inputs `X` ({}).'
-                       .format(init.shape[1], X.shape[1]))
+                       .format(init.shape[1], n_features))
 
     # Assert that init.shape[0] <= init.shape[1]
     if init.shape[0] > init.shape[1]:
@@ -464,43 +488,43 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
                        'greater than its input dimensionality ({}).'
                        .format(init.shape[0], init.shape[1]))
 
-    if num_dims is not None:
-      # Assert that self.num_dims = init.shape[0]
-      if num_dims != init.shape[0]:
-        raise ValueError('The preferred dimensionality of the '
-                         'projected space `num_dims` ({}) does'
-                         ' not match the output dimensionality of '
-                         'the given linear transformation '
-                         '`init` ({})!'
-                         .format(num_dims,
-                                 init.shape[0]))
-  elif init in ['auto', 'pca', 'lda', 'identity', 'random']:
+    # Assert that self.num_dims = init.shape[0]
+    if num_dims != init.shape[0]:
+      raise ValueError('The preferred dimensionality of the '
+                       'projected space `num_dims` ({}) does'
+                       ' not match the output dimensionality of '
+                       'the given linear transformation '
+                       '`init` ({})!'
+                       .format(num_dims,
+                               init.shape[0]))
+  elif init in authorized_inits:
     pass
   else:
     raise ValueError(
-        "`init` must be 'auto', 'pca', 'lda', 'identity', 'random' "
-        "or a numpy array of shape (num_dims, n_features).")
+        "`init` must be '{}' "
+        "or a numpy array of shape (num_dims, n_features)."
+        .format("', '".join(authorized_inits)))
 
   random_state = check_random_state(random_state)
   transformation = init
   if isinstance(init, np.ndarray):
     pass
   else:
-    n_samples, n_features = X.shape
-    num_dims = num_dims or n_features
+    n_samples = input.shape[0]
     if init == 'auto':
-      n_classes = len(np.unique(y))
-      if num_dims <= min(n_features, n_classes - 1):
+      if is_classification:
+        n_classes = len(np.unique(y))
+      if (is_classification and num_dims <= min(n_features, n_classes - 1)):
         init = 'lda'
       elif num_dims < min(n_features, n_samples):
         init = 'pca'
       else:
         init = 'identity'
     if init == 'identity':
-      transformation = np.eye(num_dims, X.shape[1])
+      transformation = np.eye(num_dims, input.shape[-1])
     elif init == 'random':
       transformation = random_state.randn(num_dims,
-                                          X.shape[1])
+                                          input.shape[-1])
     elif init in {'pca', 'lda'}:
       init_time = time.time()
       if init == 'pca':
@@ -509,32 +533,49 @@ def _initialize_transformer(num_dims, X, y=None, init='auto', verbose=False,
         if verbose:
           print('Finding principal components... ')
           sys.stdout.flush()
-        pca.fit(X)
+        pca.fit(input)
         transformation = pca.components_
       elif init == 'lda':
         lda = LinearDiscriminantAnalysis(n_components=num_dims)
         if verbose:
           print('Finding most discriminative components... ')
           sys.stdout.flush()
-        lda.fit(X, y)
+        lda.fit(input, y)
         transformation = lda.scalings_.T[:num_dims]
       if verbose:
         print('done in {:5.2f}s'.format(time.time() - init_time))
   return transformation
 
 
-def _initialize_metric_mahalanobis(pairs, init='identity', random_state=None,
+def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
                                    return_inverse=False):
   """Returns the initial mahalanobis matrix to be used depending on the
   arguments.
 
   Parameters
   ----------
-  pairs : array-like
-    The input samples.
+  input : array-like
+    The input samples (can be tuples or regular samples).
+
+  init : string or numpy array, optional (default='identity')
+         Initialization of the linear transformation. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of shape
+         (n_features, n_features).
 
-  init : array-like or None or str
-    The initial matrix.
+         'identity'
+            An identity matrix of shape (n_features, n_features).
+
+        'covariance'
+            The inverse covariance matrix.
+
+         'random'
+             The initial transformation will be a random array of shape
+             `(n_features, n_features)`. Each value is sampled from the
+             standard normal distribution.
+
+         numpy array
+             A numpy array of shape (n_features, n_features), that will
+             be used as such to initialize the metric.
 
   random_state : int or `numpy.RandomState` or None, optional (default=None)
     A pseudo random number generator object or a seed for it if int. If
@@ -551,24 +592,24 @@ def _initialize_metric_mahalanobis(pairs, init='identity', random_state=None,
   M, or (M, M_inv) : `numpy.ndarray`
     The initial matrix to use M, and its inverse if `return_inverse=True`.
   """
-
+  n_features = input.shape[-1]
   if isinstance(init, np.ndarray):
     init = check_array(init)  # TODO: do we want to copy the array ?
     # see how they do it in scikit-learn for instance
 
-    # Assert that init.shape[1] = pairs.shape[2]
-    if (init.shape) != (pairs.shape[2],) * 2:
-      raise ValueError('The input dimensionality ({}) of the given '
+    # Assert that init.shape[1] = n_features
+    if (init.shape) != (n_features,) * 2:
+      raise ValueError('The input dimensionality ({}, {}) of the given '
                        'mahalanobis matrix `init` must match the '
                        'dimensionality of the given inputs ({}).'
-                       .format(init.shape, pairs.shape[2]))
+                       .format(*(init.shape), n_features))
 
   elif init in ['identity', 'covariance', 'random']:
     pass
   else:
     raise ValueError(
         "`init` must be 'identity', 'covariance', 'random' "
-        "or a numpy array of shape (num_dims, n_features).")
+        "or a numpy array of shape (n_features, n_features).")
 
   random_state = check_random_state(random_state)
   M = init
@@ -576,13 +617,16 @@ def _initialize_metric_mahalanobis(pairs, init='identity', random_state=None,
     if return_inverse:
       M_inv = pinvh(M)
   else:
-    n_features = pairs.shape[2]
     if init == 'identity':
       M = np.eye(n_features, n_features)
       if return_inverse:
         M_inv = M.copy()
     if init == 'covariance':
-      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
+      if input.ndim == 3:
+        # if the input are tuples, we need to form an X by deduplication
+        X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
+      else:
+        X = input
       M_inv = np.atleast_2d(np.cov(X, rowvar=False))
       # TODO: check atleast_2d necessary
       M = pinvh(M_inv)
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 74a21a82..42d3e888 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -21,6 +21,7 @@
 
 from sklearn.metrics import pairwise_distances
 from .base_metric import MahalanobisMixin
+from metric_learn._util import _initialize_transformer
 
 EPS = np.finfo(float).eps
 
@@ -37,8 +38,9 @@ class MLKR(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
-               verbose=False, preprocessor=None):
+  def __init__(self, num_dims=None, init='auto', A0=None,
+               tol=None, max_iter=1000, verbose=False, preprocessor=None,
+               random_state=None):
     """
     Initialize MLKR.
 
@@ -47,7 +49,38 @@ def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
     num_dims : int, optional
         Dimensionality of reduced space (defaults to dimension of X)
 
-    A0: array-like, optional
+    init : string or numpy array, optional (default='auto')
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b).
+
+        'auto'
+            Depending on ``num_dims``, the most reasonable initialization
+            will be chosen. If ``num_dims < min(n_features, n_samples)``, we
+            use 'pca', as it projects data in meaningful directions (those
+            of higher variance). Otherwise, we just use 'identity'.
+
+        'pca'
+            ``num_dims`` principal components of the inputs passed
+            to :meth:`fit` will be used to initialize the transformation.
+            (See `sklearn.decomposition.PCA`)
+
+        'identity'
+            If ``num_dims`` is strictly smaller than the
+            dimensionality of the inputs passed to :meth:`fit`, the identity
+            matrix will be truncated to the first ``num_dims`` rows.
+
+        'random'
+            The initial transformation will be a random array of shape
+            `(num_dims, n_features)`. Each value is sampled from the
+            standard normal distribution.
+
+        numpy array
+            n_features_b must match the dimensionality of the inputs passed to
+            :meth:`fit` and n_features_a must be less than or equal to that.
+            If ``num_dims`` is not None, n_features_a must match it.
+
+    A0: array-like, optional # TODO: deprecate
         Initialization of transformation matrix. Defaults to PCA loadings.
 
     tol: float, optional (default=None)
@@ -62,12 +95,20 @@ def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be formed like this: X[indices].
+
+    random_state : int or numpy.RandomState or None, optional (default=None)
+        A pseudo random number generator object or a seed for it if int. If
+        ``init='random'``, ``random_state`` is used to initialize the random
+        transformation. If ``init='pca'``, ``random_state`` is passed as an
+        argument to PCA when initializing the transformation.
     """
     self.num_dims = num_dims
-    self.A0 = A0
+    self.init = init
+    self.A0 = A0  # TODO: deprecate
     self.tol = tol
     self.max_iter = max_iter
     self.verbose = verbose
+    self.random_state = random_state
     super(MLKR, self).__init__(preprocessor)
 
   def fit(self, X, y):
@@ -86,17 +127,11 @@ def fit(self, X, y):
           raise ValueError('Data and label lengths mismatch: %d != %d'
                            % (n, y.shape[0]))
 
-      A = self.A0
       m = self.num_dims
       if m is None:
           m = d
-      if A is None:
-          # initialize to PCA transformation matrix
-          # note: not the same as n_components=m !
-          A = PCA().fit(X).components_.T[:m]
-      elif A.shape != (m, d):
-          raise ValueError('A0 needs shape (%d,%d) but got %s' % (
-              m, d, A.shape))
+      A = _initialize_transformer(m, X, y, init=self.init,
+                                  random_state=self.random_state)
 
       # Measure the total training time
       train_time = time.time()
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 1f13cceb..00e7cf91 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -7,6 +7,7 @@
 from sklearn import clone
 from sklearn.cluster import DBSCAN
 from sklearn.utils import check_random_state
+from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.testing import set_random_state
 
 from metric_learn._util import make_context
@@ -305,19 +306,18 @@ def test_transformer_is_2D(estimator, build_dataset):
 
 @pytest.mark.parametrize('estimator, build_dataset',
                          [(ml, bd) for idml, (ml, bd)
-                          in zip(ids_supervised_learners,
-                                 supervised_learners)
+                          in zip(ids_metric_learners,
+                                 metric_learners)
                           if hasattr(ml, 'num_dims') and
-                          hasattr(ml, 'init') and
-                          (idml not in ids_regressors)],
+                          hasattr(ml, 'init')],
                          ids=[idml for idml, (ml, _)
-                              in zip(ids_supervised_learners,
-                                     supervised_learners)
+                              in zip(ids_metric_learners,
+                                     metric_learners)
                               if hasattr(ml, 'num_dims') and
-                              hasattr(ml, 'init') and
-                              (idml not in ids_regressors)])
+                              hasattr(ml, 'init')])
 def test_init_transformation(estimator, build_dataset):
     input_data, labels, _, X = build_dataset()
+    is_classification = (type_of_target(labels) in ['multiclass', 'binary'])
     model = clone(estimator)
     rng = np.random.RandomState(42)
 
@@ -338,8 +338,9 @@ def test_init_transformation(estimator, build_dataset):
     model.fit(input_data, labels)
 
     # Initialize with LDA
-    model.set_params(init='lda')
-    model.fit(input_data, labels)
+    if is_classification:
+      model.set_params(init='lda')
+      model.fit(input_data, labels)
 
     # Initialize with a numpy array
     init = rng.rand(X.shape[1], X.shape[1])
@@ -383,9 +384,10 @@ def test_init_transformation(estimator, build_dataset):
 
     # init must be as specified in the docstring
     model.set_params(init=1)
-    msg = ("`init` must be 'auto', 'pca', 'lda', 'identity', "
-           "'random' or a numpy array of shape "
-           "(num_dims, n_features).")
+    msg = ("`init` must be 'auto', 'pca', 'identity', "
+           "'random'{} or a numpy array of shape "
+           "(num_dims, n_features)."
+           .format(", 'lda'" if is_classification else ''))
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg
@@ -397,17 +399,15 @@ def test_init_transformation(estimator, build_dataset):
 @pytest.mark.parametrize('num_dims', [3, 5, 7, 11])
 @pytest.mark.parametrize('estimator, build_dataset',
                          [(ml, bd) for idml, (ml, bd)
-                          in zip(ids_supervised_learners,
-                                 supervised_learners)
+                          in zip(ids_metric_learners,
+                                 metric_learners)
                           if hasattr(ml, 'num_dims') and
-                          hasattr(ml, 'init') and
-                          (idml not in ids_regressors)],
+                          hasattr(ml, 'init')],
                          ids=[idml for idml, (ml, _)
-                              in zip(ids_supervised_learners,
-                                     supervised_learners)
+                              in zip(ids_metric_learners,
+                                     metric_learners)
                               if hasattr(ml, 'num_dims') and
-                              hasattr(ml, 'init') and
-                              (idml not in ids_regressors)])
+                              hasattr(ml, 'init')])
 def test_auto_init_transformation(n_samples, n_features, n_classes, num_dims,
                                   estimator, build_dataset):
   # Test that auto choose the init as expected with every configuration
@@ -439,7 +439,8 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, num_dims,
     else:
       model = clone(model_base)
       model.fit(X, y)
-      if num_dims <= min(n_classes - 1, n_features):
+      if (num_dims <= min(n_classes - 1, n_features) and
+              type_of_target(labels) in ['multiclass', 'binary']):
         model_other = clone(model_base).set_params(init='lda')
       elif num_dims < min(n_features, n_samples):
         model_other = clone(model_base).set_params(init='pca')
@@ -492,17 +493,17 @@ def test_init_mahalanobis(estimator, build_dataset):
     # init.shape[1] must match X.shape[1]
     init = rng.rand(X.shape[1], X.shape[1] + 1)
     model.set_params(init=init)
-    msg = ('The input dimensionality ({}) of the given '
-           'linear transformation `init` must match the '
-           'dimensionality of the given inputs `X` ({}).'
-           .format(init.shape[1], X.shape[1]))
+    msg = ('The input dimensionality ({}, {}) of the given '
+           'mahalanobis matrix `init` must match the '
+           'dimensionality of the given inputs ({}).'
+           .format(init.shape[0], init.shape[1], input_data.shape[-1]))
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg
 
     # init must be as specified in the docstring
     model.set_params(init=1)
-    msg = ("`init` must be 'identity', 'covariance'"
+    msg = ("`init` must be 'identity', 'covariance', "
            "'random' or a numpy array of shape "
            "(n_features, n_features).")
     with pytest.raises(ValueError) as raised_error:

From 0e59d72d457f5b79f76f86aa36f5de9e00dd888c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 2 May 2019 18:42:45 +0200
Subject: [PATCH 07/52] FIX: fix error message for dimension

---
 metric_learn/_util.py          | 4 ++--
 test/test_mahalanobis_mixin.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 81b67666..4e2ce7b5 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -599,10 +599,10 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
 
     # Assert that init.shape[1] = n_features
     if (init.shape) != (n_features,) * 2:
-      raise ValueError('The input dimensionality ({}, {}) of the given '
+      raise ValueError('The input dimensionality {} of the given '
                        'mahalanobis matrix `init` must match the '
                        'dimensionality of the given inputs ({}).'
-                       .format(*(init.shape), n_features))
+                       .format(init.shape, n_features))
 
   elif init in ['identity', 'covariance', 'random']:
     pass
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 00e7cf91..2cfba341 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -493,10 +493,10 @@ def test_init_mahalanobis(estimator, build_dataset):
     # init.shape[1] must match X.shape[1]
     init = rng.rand(X.shape[1], X.shape[1] + 1)
     model.set_params(init=init)
-    msg = ('The input dimensionality ({}, {}) of the given '
+    msg = ('The input dimensionality {} of the given '
            'mahalanobis matrix `init` must match the '
            'dimensionality of the given inputs ({}).'
-           .format(init.shape[0], init.shape[1], input_data.shape[-1]))
+           .format(init.shape, input_data.shape[-1]))
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg

From 60ca66275191e82d87ac7477ee8877343722014b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 2 May 2019 18:47:25 +0200
Subject: [PATCH 08/52] FIX fix StringRepr for MLKR

---
 test/test_base_metric.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index b4d2aa25..e8186619 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -76,9 +76,10 @@ def test_rca(self):
                      "preprocessor=None)")
 
   def test_mlkr(self):
-    self.assertEqual(str(metric_learn.MLKR()),
-                     "MLKR(A0=None, max_iter=1000, num_dims=None, "
-                     "preprocessor=None, tol=None,\n   verbose=False)")
+    self.assertEqual(str(metric_learn.MLKR()), """
+MLKR(A0=None, init='auto', max_iter=1000, num_dims=None, preprocessor=None,
+   random_state=None, tol=None, verbose=False)
+""".strip('\n'))
 
   def test_mmc(self):
     self.assertEqual(str(metric_learn.MMC()), """

From 71a75eda9831843f03fa022c005c27666b0ccac0 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 3 May 2019 11:32:43 +0200
Subject: [PATCH 09/52] FIX tests by reshaping to the right dataset size

---
 metric_learn/_util.py          | 78 ++++++++++++++++++++++------------
 metric_learn/mlkr.py           |  4 +-
 test/test_mahalanobis_mixin.py | 59 +++++++++++++++----------
 3 files changed, 91 insertions(+), 50 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 4e2ce7b5..5360a79c 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -413,7 +413,8 @@ def validate_vector(u, dtype=None):
 
 
 def _initialize_transformer(num_dims, input, y=None, init='auto',
-                            verbose=False, random_state=None):
+                            verbose=False, random_state=None,
+                            has_classes=True):
   """Returns the initial transformer to be used depending on the arguments.
 
   Parameters
@@ -429,35 +430,60 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
   y : array-like or None
     The input labels (or not if there are no labels).
 
-  init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
-
-         'identity'
-            An identity matrix of shape (n_features, n_features).
-
-        'covariance'
-            The inverse covariance matrix.
-
-         'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
-
-         numpy array
-             A numpy array of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+  init : string or numpy array, optional (default='auto')
+      Initialization of the linear transformation. Possible options are
+      'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+      (n_features_a, n_features_b).
+
+      'auto'
+          Depending on ``num_dims``, the most reasonable initialization will
+          be chosen. If ``num_dims <= n_classes`` we use 'lda' (if possible,
+          see the description of 'lda' init), as it uses labels information.
+          If not, but ``num_dims < min(n_features, n_samples)``, we use
+          'pca', as it projects data in meaningful directions (those of
+          higher variance). Otherwise, we just use 'identity'.
+
+      'pca'
+          ``num_dims`` principal components of the inputs passed
+          to :meth:`fit` will be used to initialize the transformation.
+          (See `sklearn.decomposition.PCA`)
+
+      'lda'
+          ``min(num_dims, n_classes)`` most discriminative
+          components of the inputs passed to :meth:`fit` will be used to
+          initialize the transformation. (If ``num_dims > n_classes``,
+          the rest of the components will be zero.) (See
+          `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`).
+          This initialization is possible only if `has_classes == True`.
+
+      'identity'
+          If ``num_dims`` is strictly smaller than the
+          dimensionality of the inputs passed to :meth:`fit`, the identity
+          matrix will be truncated to the first ``num_dims`` rows.
+
+      'random'
+          The initial transformation will be a random array of shape
+          `(num_dims, n_features)`. Each value is sampled from the
+          standard normal distribution.
+
+      numpy array
+          n_features_b must match the dimensionality of the inputs passed to
+          :meth:`fit` and n_features_a must be less than or equal to that.
+          If ``num_dims`` is not None, n_features_a must match it.
 
   verbose : bool
     Whether to print the details of the initialization or not.
 
-  random_state: int or `numpy.RandomState` or None, optional (default=None)
+  random_state : int or `numpy.RandomState` or None, optional (default=None)
     A pseudo random number generator object or a seed for it if int. If
     ``init='random'``, ``random_state`` is used to initialize the random
     transformation. If ``init='pca'``, ``random_state`` is passed as an
     argument to PCA when initializing the transformation.
 
+  has_classes : bool (default=True)
+    Whether the labels are in fact classes. If true, this will allow to use
+    the 'lda' initialization.
+
   Returns
   -------
   init_transformer : `numpy.ndarray`
@@ -466,9 +492,7 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
   # if we are doing a regression we cannot use lda:
   n_features = input.shape[-1]
   authorized_inits = ['auto', 'pca', 'identity', 'random']
-  is_classification = (type_of_target(y) in ['multiclass',
-                                             'binary'])
-  if is_classification:
+  if has_classes:
     authorized_inits.append('lda')
 
   if isinstance(init, np.ndarray):
@@ -512,9 +536,9 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
   else:
     n_samples = input.shape[0]
     if init == 'auto':
-      if is_classification:
+      if has_classes:
         n_classes = len(np.unique(y))
-      if (is_classification and num_dims <= min(n_features, n_classes - 1)):
+      if (has_classes and num_dims <= min(n_features, n_classes - 1)):
         init = 'lda'
       elif num_dims < min(n_features, n_samples):
         init = 'pca'
@@ -565,7 +589,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 42d3e888..1eaa41f1 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -131,7 +131,9 @@ def fit(self, X, y):
       if m is None:
           m = d
       A = _initialize_transformer(m, X, y, init=self.init,
-                                  random_state=self.random_state)
+                                  random_state=self.random_state,
+                                  # MLKR works on regression targets:
+                                  has_classes=False)
 
       # Measure the total training time
       train_time = time.time()
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 2cfba341..013f9260 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -16,7 +16,8 @@
 
 from test.test_utils import (ids_metric_learners, metric_learners,
                              remove_y_quadruplets, ids_regressors,
-                             ids_supervised_learners, supervised_learners)
+                             ids_supervised_learners, supervised_learners,
+                             ids_classifiers)
 
 RNG = check_random_state(0)
 
@@ -410,43 +411,57 @@ def test_init_transformation(estimator, build_dataset):
                               hasattr(ml, 'init')])
 def test_auto_init_transformation(n_samples, n_features, n_classes, num_dims,
                                   estimator, build_dataset):
-  # Test that auto choose the init as expected with every configuration
-  # of order of n_samples, n_features, n_classes and num_dims.
-  input_data, labels, _, X = build_dataset()
-  model_base = clone(estimator)
-  rng = np.random.RandomState(42)
-  model_base.set_params(init='auto',
-                        num_dims=num_dims,
-                        random_state=rng)
-
-  # To make the test work for LMNN:
-  if 'LMNN' in model_base.__class__.__name__:
-    model_base.set_params(k=1)
-  # To make the test faster for estimators that have a max_iter:
-  if hasattr(model_base, 'max_iter'):
-    model_base.set_params(max_iter=1)
+  # Test that auto choose the init transformation as expected with every
+  # configuration of order of n_samples, n_features, n_classes and num_dims,
+  # for all metric learners that learn a transformation.
   if n_classes >= n_samples:
     pass
     # n_classes > n_samples is impossible, and n_classes == n_samples
     # throws an error from lda but is an absurd case
   else:
-    X = rng.randn(n_samples, n_features)
-    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+    input_data, labels, _, X = build_dataset()
+    model_base = clone(estimator)
+    rng = np.random.RandomState(42)
+    model_base.set_params(init='auto',
+                          num_dims=num_dims,
+                          random_state=rng)
+    # To make the test work for LMNN:
+    if 'LMNN' in model_base.__class__.__name__:
+      model_base.set_params(k=1)
+    # To make the test faster for estimators that have a max_iter:
+    if hasattr(model_base, 'max_iter'):
+      model_base.set_params(max_iter=1)
     if num_dims > n_features:
       # this would return a ValueError, which is tested in
       # test_init_transformation
       pass
     else:
+      # We need to build a dataset of the right shape:
+      num_to_pad_n_samples = ((n_samples // input_data.shape[0] + 1))
+      num_to_pad_n_features = ((n_samples // input_data.shape[-1] + 1))
+      if input_data.ndim == 3:
+        input_data = np.tile(input_data,
+                             (num_to_pad_n_samples, input_data.shape[1],
+                              num_to_pad_n_features))
+      else:
+        input_data = np.tile(input_data,
+                             (num_to_pad_n_samples, num_to_pad_n_features))
+      input_data = input_data[:n_samples, ..., :n_features]
+      has_classes = model_base.__class__.__name__ in ids_classifiers
+      if has_classes:
+        labels = np.tile(range(n_classes), n_samples //
+                          n_classes + 1)[:n_samples]
+      else:
+        labels = np.tile(labels, n_samples // labels.shape[0] + 1)[:n_samples]
       model = clone(model_base)
-      model.fit(X, y)
-      if (num_dims <= min(n_classes - 1, n_features) and
-              type_of_target(labels) in ['multiclass', 'binary']):
+      model.fit(input_data, labels)
+      if num_dims <= min(n_classes - 1, n_features) and has_classes:
         model_other = clone(model_base).set_params(init='lda')
       elif num_dims < min(n_features, n_samples):
         model_other = clone(model_base).set_params(init='pca')
       else:
         model_other = clone(model_base).set_params(init='identity')
-      model_other.fit(X, y)
+      model_other.fit(input_data, labels)
       assert_array_almost_equal(model.transformer_,
                                 model_other.transformer_)
 

From 1b2d296f8694f6efaf57e453549006110f0dd67c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 3 May 2019 11:33:24 +0200
Subject: [PATCH 10/52] Remove lda in docstring of MLKR

---
 metric_learn/mlkr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 1eaa41f1..98c72c69 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -51,7 +51,7 @@ def __init__(self, num_dims=None, init='auto', A0=None,
 
     init : string or numpy array, optional (default='auto')
         Initialization of the linear transformation. Possible options are
-        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
+        'auto', 'pca', 'identity', 'random', and a numpy array of shape
         (n_features_a, n_features_b).
 
         'auto'

From bd709e9833462b39881fcb117775e38e05af8c43 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 13:05:43 +0200
Subject: [PATCH 11/52] MAINT: Add deprecation for previous initializations

---
 metric_learn/itml.py      |  33 ++++++++-----
 metric_learn/lsml.py      |  35 ++++++-------
 metric_learn/mlkr.py      |  16 ++++--
 metric_learn/mmc.py       |  28 +++++++----
 metric_learn/sdml.py      |  25 +++++++---
 test/metric_learn_test.py | 101 +++++++++++++++++++++++++++++++++++++-
 test/test_base_metric.py  |  30 +++++------
 7 files changed, 202 insertions(+), 66 deletions(-)

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index aeac9f6b..18b7b3cb 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -31,8 +31,8 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               init='identity', A0=None, verbose=False, preprocessor=None,
-               random_state=None):
+               init='identity', A0='deprecated', verbose=False,
+               preprocessor=None, random_state=None):
     """Initialize ITML.
 
     Parameters
@@ -52,8 +52,8 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
-            The inverse covariance matrix.
+         'covariance'
+           The inverse covariance matrix.
 
          'random'
              The initial transformation will be a random array of shape
@@ -64,8 +64,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
              A numpy array of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
-    A0 : (d x d) matrix, optional
-        initial regularization matrix, defaults to identity
+    A0 : Not used
+      .. deprecated:: 0.5.0
+         `A0` was deprecated in version 0.5.0 and will
+         be removed in 0.6.0. Use 'init' instead.
 
     verbose : bool, optional
         if True, prints information while learning
@@ -91,6 +93,11 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     super(_BaseITML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y, bounds=None):
+    if self.A0 != 'deprecated':
+      warnings.warn('"A0" parameter is not used.'
+                    ' It has been deprecated in version 0.5.0 and will be'
+                    'removed in 0.6.0. Use "init" instead.',
+                    DeprecationWarning)
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
     # init bounds
@@ -249,8 +256,8 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
                num_labeled='deprecated', num_constraints=None,
-               bounds='deprecated', init='identity', A0=None, verbose=False,
-               preprocessor=None, random_state=None):
+               bounds='deprecated', init='identity', A0='deprecated',
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `ITML`.
 
     `ITML_Supervised` creates pairs of similar sample by taking same class
@@ -295,8 +302,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
              A numpy array of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
-    A0 : (d x d) matrix, optional  # TODO: deprecate
-        initial regularization matrix, defaults to identity
+    A0 : Not used
+      .. deprecated:: 0.5.0
+         `A0` was deprecated in version 0.5.0 and will
+         be removed in 0.6.0. Use 'init' instead.
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
@@ -309,8 +318,8 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     """
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
                        convergence_threshold=convergence_threshold,
-                       A0=A0, verbose=verbose, preprocessor=preprocessor,
-                       random_state=random_state)
+                       A0=A0, init=init, verbose=verbose,
+                       preprocessor=preprocessor, random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
     self.bounds = bounds
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index a6c4497f..d8659c4c 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -24,7 +24,7 @@ class _BaseLSML(MahalanobisMixin):
   _tuple_size = 4  # constraints are quadruplets
 
   def __init__(self, tol=1e-3, max_iter=1000, init='identity',
-               prior=None, verbose=False, preprocessor=None,
+               prior='deprecated', verbose=False, preprocessor=None,
                random_state=None):
     """Initialize LSML.
 
@@ -52,9 +52,10 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
 
     tol : float, optional
     max_iter : int, optional
-    prior : (d x d) matrix, optional  # TODO: deprecate, and explain how to set
-        #the new init (the inverse of the prior)
-        guess at a metric [default: inv(covariance(X))]
+    prior : Not used.
+       .. deprecated:: 0.5.0
+         `prior` was deprecated in version 0.5.0 and will
+         be removed in 0.6.0. Use 'init' instead.
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
@@ -74,6 +75,11 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
     super(_BaseLSML, self).__init__(preprocessor)
 
   def _fit(self, quadruplets, weights=None):
+    if self.prior != 'deprecated':
+      warnings.warn('"prior" parameter is not used.'
+                    ' It has been deprecated in version 0.5.0 and will be'
+                    'removed in 0.6.0. Use "init" instead.',
+                    DeprecationWarning)
     quadruplets = self._prepare_inputs(quadruplets,
                                        type_of_inputs='tuples')
 
@@ -89,14 +95,6 @@ def _fit(self, quadruplets, weights=None):
     self.w_ /= self.w_.sum()  # weights must sum to 1
     M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.init,
                                                   return_inverse=True)
-    if self.prior is None:
-      X = np.vstack({tuple(row) for row in
-                     quadruplets.reshape(-1, quadruplets.shape[2])})
-      prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
-      M = np.linalg.inv(prior_inv)
-    else:
-      M = self.prior
-      prior_inv = np.linalg.inv(self.prior)
 
     step_sizes = np.logspace(-10, 0, 10)
     # Keep track of the best step size and the loss at that step.
@@ -210,9 +208,10 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, tol=1e-3, max_iter=1000, init='identity', prior=None,
-               num_labeled='deprecated', num_constraints=None, weights=None,
-               verbose=False, preprocessor=None, random_state=None):
+  def __init__(self, tol=1e-3, max_iter=1000, init='identity',
+               prior='deprecated', num_labeled='deprecated',
+               num_constraints=None, weights=None, verbose=False,
+               preprocessor=None, random_state=None):
     """Initialize the supervised version of `LSML`.
 
     `LSML_Supervised` creates quadruplets from labeled samples by taking two
@@ -236,8 +235,10 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity', prior=None,
              The initial transformation will be a random array of shape
              `(n_features, n_features)`. Each value is sampled from the
              standard normal distribution.
-    prior : (d x d) matrix, optional
-        guess at a metric [default: covariance(X)]
+    prior : Not used.
+       .. deprecated:: 0.5.0
+         `prior` was deprecated in version 0.5.0 and will
+         be removed in 0.6.0. Use 'init' instead.
     num_labeled : Not used
       .. deprecated:: 0.5.0
          `num_labeled` was deprecated in version 0.5.0 and will
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 98c72c69..e14af3c7 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -38,7 +38,7 @@ class MLKR(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims=None, init='auto', A0=None,
+  def __init__(self, num_dims=None, init='auto', A0='deprecated',
                tol=None, max_iter=1000, verbose=False, preprocessor=None,
                random_state=None):
     """
@@ -80,8 +80,10 @@ def __init__(self, num_dims=None, init='auto', A0=None,
             :meth:`fit` and n_features_a must be less than or equal to that.
             If ``num_dims`` is not None, n_features_a must match it.
 
-    A0: array-like, optional # TODO: deprecate
-        Initialization of transformation matrix. Defaults to PCA loadings.
+    A0: Not used.
+        .. deprecated:: 0.5.0
+          `A0` was deprecated in version 0.5.0 and will
+          be removed in 0.6.0. Use 'init' instead.
 
     tol: float, optional (default=None)
         Convergence tolerance for the optimization.
@@ -104,7 +106,7 @@ def __init__(self, num_dims=None, init='auto', A0=None,
     """
     self.num_dims = num_dims
     self.init = init
-    self.A0 = A0  # TODO: deprecate
+    self.A0 = A0
     self.tol = tol
     self.max_iter = max_iter
     self.verbose = verbose
@@ -120,6 +122,12 @@ def fit(self, X, y):
       X : (n x d) array of samples
       y : (n) data labels
       """
+      if self.init != 'deprecated':
+        warnings.warn('"A0" parameter is not used.'
+                      ' It has been deprecated in version 0.5.0 and will be'
+                      'removed in 0.6.0. Use "init" instead.',
+                      DeprecationWarning)
+
       X, y = self._prepare_inputs(X, y, y_numeric=True,
                                   ensure_min_samples=2)
       n, d = X.shape
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 788de784..37e80fb7 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -34,8 +34,9 @@ class _BaseMMC(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
-               init='identity', A0=None, diagonal=False, diagonal_c=1.0,
-               verbose=False, preprocessor=None, random_state=None):
+               init='identity', A0='deprecated', diagonal=False,
+               diagonal_c=1.0, verbose=False, preprocessor=None,
+               random_state=None):
     """Initialize MMC.
     Parameters
     ----------
@@ -68,9 +69,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be gotten like this: X[indices].
-    A0 : (d x d) matrix, optional
-        initial metric, defaults to identity
-        only the main diagonal is taken if `diagonal == True`
+    A0 : Not used.
+        .. deprecated:: 0.5.0
+          `A0` was deprecated in version 0.5.0 and will
+          be removed in 0.6.0. Use 'init' instead.
     diagonal : bool, optional
         if True, a diagonal metric will be learned,
         i.e., a simple scaling of dimensions
@@ -91,7 +93,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     self.max_proj = max_proj
     self.convergence_threshold = convergence_threshold
     self.init = init
-    self.A0 = A0  # TODO: deprecate
+    self.A0 = A0
     self.diagonal = diagonal
     self.diagonal_c = diagonal_c
     self.verbose = verbose
@@ -99,6 +101,11 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     super(_BaseMMC, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
+    if self.init != 'deprecated':
+      warnings.warn('"A0" parameter is not used.'
+                    ' It has been deprecated in version 0.5.0 and will be'
+                    'removed in 0.6.0. Use "init" instead.',
+                    DeprecationWarning)
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
@@ -438,7 +445,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
                num_labeled='deprecated', num_constraints=None, init='identity',
-               A0=None, diagonal=False, diagonal_c=1.0, verbose=False,
+               A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False,
                preprocessor=None, random_state=None):
     """Initialize the supervised version of `MMC`.
 
@@ -483,9 +490,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
         tuples will be gotten like this: X[indices].
-    A0 : (d x d) matrix, optional
-        initial metric, defaults to identity
-        only the main diagonal is taken if `diagonal == True`
+    A0 : Not used.
+        .. deprecated:: 0.5.0
+          `A0` was deprecated in version 0.5.0 and will
+          be removed in 0.6.0. Use 'init' instead.
     diagonal : bool, optional
         if True, a diagonal metric will be learned,
         i.e., a simple scaling of dimensions
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index e9e17784..c1431c69 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -32,7 +32,7 @@ class _BaseSDML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
-               use_cov=True, verbose=False, preprocessor=None,
+               use_cov='deprecated', verbose=False, preprocessor=None,
                random_state=None):
     """
     Parameters
@@ -63,8 +63,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
              A numpy array of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
-    use_cov : bool, optional  # TODO: to deprecate
-        controls prior matrix, will use the identity if use_cov=False
+    use_cov : Not used.
+        .. deprecated:: 0.5.0
+          `A0` was deprecated in version 0.5.0 and will
+          be removed in 0.6.0. Use 'init' instead.
 
     verbose : bool, optional
         if True, prints information while learning
@@ -87,6 +89,11 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     super(_BaseSDML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
+    if self.use_cov != 'deprecated':
+      warnings.warn('"use_cov" parameter is not used.'
+                    ' It has been deprecated in version 0.5.0 and will be'
+                    'removed in 0.6.0. Use "init" instead.',
+                    DeprecationWarning)
     if not HAS_SKGGM:
       if self.verbose:
         print("SDML will use scikit-learn's graphical lasso solver.")
@@ -213,8 +220,9 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
   """
 
   def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
-               use_cov=True, num_labeled='deprecated', num_constraints=None,
-               verbose=False, preprocessor=None, random_state=None):
+               use_cov='deprecated', num_labeled='deprecated',
+               num_constraints=None, verbose=False, preprocessor=None,
+               random_state=None):
     """Initialize the supervised version of `SDML`.
 
     `SDML_Supervised` creates pairs of similar sample by taking same class
@@ -246,8 +254,11 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
          numpy array
              A numpy array of shape (n_features, n_features), that will
              be used as such to initialize the metric.
-    use_cov : bool, optional
-        controls prior matrix, will use the identity if use_cov=False
+    use_cov : Not used.
+        .. deprecated:: 0.5.0
+          `A0` was deprecated in version 0.5.0 and will
+          be removed in 0.6.0. Use 'init' instead.
+
     num_labeled : Not used
       .. deprecated:: 0.5.0
          `num_labeled` was deprecated in version 0.5.0 and will
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index d5c54538..f186da4e 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -18,7 +18,7 @@
   HAS_SKGGM = True
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           LSML_Supervised, ITML_Supervised, SDML_Supervised,
-                          RCA_Supervised, MMC_Supervised, SDML)
+                          RCA_Supervised, MMC_Supervised, SDML, ITML, LSML)
 # Import this specially for testing.
 from metric_learn.constraints import wrap_pairs
 from metric_learn.lmnn import python_LMNN
@@ -74,6 +74,27 @@ def test_deprecation_num_labeled(self):
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y)
 
+  def test_deprecation_prior(self):
+    # test that a deprecation message is thrown if A0 is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    lsml_supervised = LSML_Supervised(prior=np.ones_like(X))
+    msg = ('"prior" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0. Use "init" instead.')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      lsml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    quadruplets = np.array([[[-10., 0.], [10., 0.], [0., 50.], [0., -60]],
+                            [[-27., 31.], [12., 52.], [71., 30.], [41., -24]]])
+    lsml = LSML(prior=np.ones_like(X))
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      lsml.fit(quadruplets)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestITML(MetricTestCase):
   def test_iris(self):
@@ -108,6 +129,27 @@ def test_deprecation_bounds(self):
            'fit method instead.')
     assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y)
 
+  def test_deprecation_A0(self):
+    # test that a deprecation message is thrown if A0 is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    itml_supervised = ITML_Supervised(A0=np.ones_like(X))
+    msg = ('"A0" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0. Use "init" instead.')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      itml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
+    y_pairs = [1, -1]
+    itml = ITML(A0=np.ones_like(X))
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      itml.fit(pairs, y_pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestLMNN(MetricTestCase):
   def test_iris(self):
@@ -326,6 +368,28 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
                            use_cov=True)
     sdml.fit(X, y)
 
+  def test_deprecation_use_cov(self):
+    # test that a deprecation message is thrown if use_cov  is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    sdml_supervised = SDML_Supervised(use_cov=np.ones_like(X),
+                                      balance_param=1e-5)
+    msg = ('"use_cov" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0. Use "init" instead.')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      sdml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
+    y_pairs = [1, -1]
+    sdml = SDML(use_cov=np.ones_like(X), balance_param=1e-5)
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      sdml.fit(pairs, y_pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.skipif(not HAS_SKGGM,
                     reason='The message should be printed only if skggm is '
@@ -557,6 +621,20 @@ def grad_fn(M):
     rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M))
     np.testing.assert_almost_equal(rel_diff, 0.)
 
+  def test_deprecation_A0(self):
+    # test that a deprecation message is thrown if A0 is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    mlkr = MLKR(A0=np.ones_like(X))
+    msg = ('"A0" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0. Use "init" instead.')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      mlkr.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestMMC(MetricTestCase):
   def test_iris(self):
@@ -608,6 +686,27 @@ def test_deprecation_num_labeled(self):
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y)
 
+  def test_deprecation_A0(self):
+    # test that a deprecation message is thrown if A0 is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    mmc_supervised = MMC_Supervised(A0=np.ones_like(X))
+    msg = ('"A0" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0. Use "init" instead.')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      mmc_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
+    y_pairs = [1, -1]
+    mmc = MMC(A0=np.ones_like(X))
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      mmc.fit(pairs, y_pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.parametrize(('algo_class', 'dataset'),
                          [(NCA, make_classification()),
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index e8186619..bd9c5a13 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -34,37 +34,37 @@ def test_lfda(self):
 
   def test_itml(self):
     self.assertEqual(str(metric_learn.ITML()), """
-ITML(A0=None, convergence_threshold=0.001, gamma=1.0, init='identity',
+ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, init='identity',
    max_iter=1000, preprocessor=None, random_state=None, verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.ITML_Supervised()), """
-ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001,
-        gamma=1.0, init='identity', max_iter=1000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, random_state=None,
-        verbose=False)
+ITML_Supervised(A0='deprecated', bounds='deprecated',
+        convergence_threshold=0.001, gamma=1.0, init='identity',
+        max_iter=1000, num_constraints=None, num_labeled='deprecated',
+        preprocessor=None, random_state=None, verbose=False)
 """.strip('\n'))
 
   def test_lsml(self):
     self.assertEqual(
         str(metric_learn.LSML()), """
-LSML(init='identity', max_iter=1000, preprocessor=None, prior=None,
+LSML(init='identity', max_iter=1000, preprocessor=None, prior='deprecated',
    random_state=None, tol=0.001, verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.LSML_Supervised()), """
 LSML_Supervised(init='identity', max_iter=1000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, prior=None,
+        num_labeled='deprecated', preprocessor=None, prior='deprecated',
         random_state=None, tol=0.001, verbose=False, weights=None)
 """.strip('\n'))
 
   def test_sdml(self):
     self.assertEqual(str(metric_learn.SDML()), """
 SDML(balance_param=0.5, init='identity', preprocessor=None, random_state=None,
-   sparsity_param=0.01, use_cov=True, verbose=False)
+   sparsity_param=0.01, use_cov='deprecated', verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.SDML_Supervised()), """
 SDML_Supervised(balance_param=0.5, init='identity', num_constraints=None,
         num_labeled='deprecated', preprocessor=None, random_state=None,
-        sparsity_param=0.01, use_cov=True, verbose=False)
+        sparsity_param=0.01, use_cov='deprecated', verbose=False)
 """.strip('\n'))
 
   def test_rca(self):
@@ -77,18 +77,18 @@ def test_rca(self):
 
   def test_mlkr(self):
     self.assertEqual(str(metric_learn.MLKR()), """
-MLKR(A0=None, init='auto', max_iter=1000, num_dims=None, preprocessor=None,
-   random_state=None, tol=None, verbose=False)
+MLKR(A0='deprecated', init='auto', max_iter=1000, num_dims=None,
+   preprocessor=None, random_state=None, tol=None, verbose=False)
 """.strip('\n'))
 
   def test_mmc(self):
     self.assertEqual(str(metric_learn.MMC()), """
-MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0,
-  init='identity', max_iter=100, max_proj=10000, preprocessor=None,
-  random_state=None, verbose=False)
+MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
+  diagonal_c=1.0, init='identity', max_iter=100, max_proj=10000,
+  preprocessor=None, random_state=None, verbose=False)
 """.strip('\n'))
     self.assertEqual(str(metric_learn.MMC_Supervised()), """
-MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False,
+MMC_Supervised(A0='deprecated', convergence_threshold=1e-06, diagonal=False,
         diagonal_c=1.0, init='identity', max_iter=100, max_proj=10000,
         num_constraints=None, num_labeled='deprecated', preprocessor=None,
         random_state=None, verbose=False)

From e162e6a0036920c5b5e296df74c322eeac749888 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 14:15:17 +0200
Subject: [PATCH 12/52] Update tests with new initialization

---
 metric_learn/sdml.py                       |  2 +-
 test/metric_learn_test.py                  | 31 +++++++++++-----------
 test/test_fit_transform.py                 |  4 +--
 test/test_sklearn_compat.py                |  2 +-
 test/test_transformer_metric_conversion.py |  2 +-
 test/test_utils.py                         |  4 +--
 6 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index c1431c69..d07a2cea 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -119,7 +119,7 @@ def _fit(self, pairs, y):
                     "positive semi-definite (PSD). The algorithm may diverge, "
                     "and lead to degenerate solutions. "
                     "To prevent that, try to decrease the balance parameter "
-                    "`balance_param` and/or to set use_cov=False.",
+                    "`balance_param` and/or to set init='identity'.",
                     ConvergenceWarning)
       w -= min_eigval  # we translate the eigenvalues to make them all positive
     w += 1e-10  # we add a small offset to avoid definiteness problems
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index f186da4e..7543f475 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -231,7 +231,7 @@ def test_sdml_raises_warning_msg_not_installed_skggm(self):
     # because it will return a non SPD matrix
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y_pairs = [1, -1]
-    sdml = SDML(use_cov=False, balance_param=100, verbose=True)
+    sdml = SDML(init='identity', balance_param=100, verbose=True)
 
     msg = ("There was a problem in SDML when using scikit-learn's graphical "
            "lasso solver. skggm's graphical lasso can sometimes converge on "
@@ -254,7 +254,7 @@ def test_sdml_raises_warning_msg_installed_skggm(self):
     # because it will return non finite values
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y_pairs = [1, -1]
-    sdml = SDML(use_cov=False, balance_param=100, verbose=True)
+    sdml = SDML(init='identity', balance_param=100, verbose=True)
 
     msg = ("There was a problem in SDML when using skggm's graphical "
            "lasso solver.")
@@ -277,7 +277,7 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self):
     # pathological case)
     X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]])
     y = [0, 0, 1, 1]
-    sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=False,
+    sdml_supervised = SDML_Supervised(balance_param=0.5, init='identity',
                                       sparsity_param=0.01)
     msg = ("There was a problem in SDML when using skggm's graphical "
            "lasso solver.")
@@ -295,11 +295,11 @@ def test_raises_no_warning_installed_skggm(self):
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
     with pytest.warns(None) as record:
-      sdml = SDML()
+      sdml = SDML(init='covariance')
       sdml.fit(pairs, y_pairs)
     assert len(record) == 0
     with pytest.warns(None) as record:
-      sdml = SDML_Supervised(use_cov=False, balance_param=1e-5)
+      sdml = SDML_Supervised(init='identity', balance_param=1e-5)
       sdml.fit(X, y)
     assert len(record) == 0
 
@@ -308,7 +308,7 @@ def test_iris(self):
     # TODO: un-flake it!
     rs = np.random.RandomState(5555)
 
-    sdml = SDML_Supervised(num_constraints=1500, use_cov=False,
+    sdml = SDML_Supervised(num_constraints=1500, init='identity',
                            balance_param=5e-5)
     sdml.fit(self.iris_points, self.iris_labels, random_state=rs)
     csep = class_separation(sdml.transform(self.iris_points),
@@ -320,7 +320,7 @@ def test_deprecation_num_labeled(self):
     # initialization
     # TODO: remove in v.0.6
     X, y = make_classification(random_state=42)
-    sdml_supervised = SDML_Supervised(num_labeled=np.inf, use_cov=False,
+    sdml_supervised = SDML_Supervised(num_labeled=np.inf, init='identity',
                                       balance_param=5e-5)
     msg = ('"num_labeled" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
@@ -337,7 +337,7 @@ def test_sdml_raises_warning_non_psd(self):
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
-           "`balance_param` and/or to set use_cov=False.")
+           "`balance_param` and/or to set init='identity'.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
       try:
         sdml.fit(pairs, y)
@@ -352,7 +352,7 @@ def test_sdml_converges_if_psd(self):
     pseudo-covariance matrix is PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y = [1, -1]
-    sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
+    sdml = SDML(init='covariance', sparsity_param=0.01, balance_param=0.5)
     sdml.fit(pairs, y)
     assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
 
@@ -365,7 +365,7 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
     it should work, but scikit-learn's graphical_lasso does not work"""
     X, y = load_iris(return_X_y=True)
     sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01,
-                           use_cov=True)
+                           init='covariance')
     sdml.fit(X, y)
 
   def test_deprecation_use_cov(self):
@@ -400,7 +400,7 @@ def test_verbose_has_installed_skggm_sdml(capsys):
   # TODO: remove if we don't need skggm anymore
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
-  sdml = SDML(verbose=True)
+  sdml = SDML(verbose=True, init='covariance')
   sdml.fit(pairs, y_pairs)
   out, _ = capsys.readouterr()
   assert "SDML will use skggm's graphical lasso solver." in out
@@ -414,7 +414,7 @@ def test_verbose_has_installed_skggm_sdml_supervised(capsys):
   # skggm's solver is used (when they use SDML_Supervised)
   # TODO: remove if we don't need skggm anymore
   X, y = make_classification(random_state=42)
-  sdml = SDML_Supervised(verbose=True)
+  sdml = SDML_Supervised(verbose=True, init='covariance')
   sdml.fit(X, y)
   out, _ = capsys.readouterr()
   assert "SDML will use skggm's graphical lasso solver." in out
@@ -443,7 +443,7 @@ def test_verbose_has_not_installed_skggm_sdml_supervised(capsys):
   # skggm's solver is used (when they use SDML_Supervised)
   # TODO: remove if we don't need skggm anymore
   X, y = make_classification(random_state=42)
-  sdml = SDML_Supervised(verbose=True, balance_param=1e-5, use_cov=False)
+  sdml = SDML_Supervised(verbose=True, balance_param=1e-5, init='identity')
   sdml.fit(X, y)
   out, _ = capsys.readouterr()
   assert "SDML will use scikit-learn's graphical lasso solver." in out
@@ -646,8 +646,9 @@ def test_iris(self):
     c, d = np.nonzero(np.triu(~mask, k=1))
 
     # Full metric
-    mmc = MMC(convergence_threshold=0.01)
-    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
+    n_features = self.iris_points.shape[1]
+    mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10)
+    mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
     expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                 [+0.000868, +0.001468, -0.002021, -0.002879],
                 [-0.001195, -0.002021, +0.002782, +0.003964],
diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py
index b85e9273..705258cb 100644
--- a/test/test_fit_transform.py
+++ b/test/test_fit_transform.py
@@ -65,13 +65,13 @@ def test_lmnn(self):
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           use_cov=False)
+                           init='identity')
     sdml.fit(self.X, self.y, random_state=seed)
     res_1 = sdml.transform(self.X)
 
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           use_cov=False)
+                           init='identity')
     res_2 = sdml.fit_transform(self.X, self.y, random_state=seed)
 
     assert_array_almost_equal(res_1, res_2)
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index 5d6c5d77..bcc1422e 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -85,7 +85,7 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated',
                                num_constraints=num_constraints,
                                verbose=verbose,
                                preprocessor=preprocessor,
-                               balance_param=1e-5, use_cov=False)
+                               balance_param=1e-5, init='identity')
     dSDML.__init__ = stable_init
     check_estimator(dSDML)
 
diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py
index 4328320d..7c6d2d77 100644
--- a/test/test_transformer_metric_conversion.py
+++ b/test/test_transformer_metric_conversion.py
@@ -49,7 +49,7 @@ def test_lmnn(self):
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, use_cov=False,
+    sdml = SDML_Supervised(num_constraints=1500, init='identity',
                            balance_param=1e-5)
     sdml.fit(self.X, self.y, random_state=seed)
     L = sdml.transformer_
diff --git a/test/test_utils.py b/test/test_utils.py
index 52ebc7a6..2890251b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -106,7 +106,7 @@ def build_quadruplets(with_preprocessor=False):
                   # be solved
                   # TODO: remove this comment when #175 is solved
                   (MMC(max_iter=2), build_pairs),  # max_iter=2 to be faster
-                  (SDML(use_cov=False, balance_param=1e-5), build_pairs)]
+                  (SDML(init='identity', balance_param=1e-5), build_pairs)]
 ids_pairs_learners = list(map(lambda x: x.__class__.__name__,
                               [learner for (learner, _) in
                                pairs_learners]))
@@ -120,7 +120,7 @@ def build_quadruplets(with_preprocessor=False):
                (LSML_Supervised(), build_classification),
                (MMC_Supervised(max_iter=5), build_classification),
                (RCA_Supervised(num_chunks=10), build_classification),
-               (SDML_Supervised(use_cov=False, balance_param=1e-5),
+               (SDML_Supervised(init='identity', balance_param=1e-5),
                build_classification)]
 ids_classifiers = list(map(lambda x: x.__class__.__name__,
                            [learner for (learner, _) in

From d1e88af692e77dda5567fb25737798a6a11f5024 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 14:46:43 +0200
Subject: [PATCH 13/52] Make random init for mahalanobis metric generate an SPD
 matrix

---
 metric_learn/_util.py          | 11 +++++------
 metric_learn/itml.py           |  6 +++---
 metric_learn/lsml.py           | 22 +++++++++++++++-------
 metric_learn/mmc.py            | 14 +++++++-------
 metric_learn/sdml.py           | 16 ++++++++--------
 test/test_mahalanobis_mixin.py |  5 +++--
 6 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 5360a79c..b6455b7b 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -1,6 +1,7 @@
 import numpy as np
 import six
 from numpy.linalg import LinAlgError
+from sklearn.datasets import make_spd_matrix
 from sklearn.decomposition import PCA
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_X_y, check_random_state
@@ -593,9 +594,9 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+             The initial transformation will be a random SPD matrix of shape
+             `(n_features, n_features)`, using
+             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
@@ -656,9 +657,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
       M = pinvh(M_inv)
     elif init == 'random':
       # we need to create a random symmetric matrix
-      M = random_state.randn(n_features,
-                             n_features)
-      M = np.tril(M) + np.tril(M, -1).T
+      M = make_spd_matrix(n_features, random_state=random_state)
       if return_inverse:
         M_inv = pinvh(M)
   if return_inverse:
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 18b7b3cb..b8468b6f 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -56,9 +56,9 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
            The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+           The initial transformation will be a random SPD matrix of shape
+           `(n_features, n_features)`, using
+           `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index d8659c4c..efa5c525 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -38,13 +38,13 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
@@ -231,10 +231,18 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
          'identity'
             An identity matrix of shape (n_features, n_features).
 
+         'covariance'
+            The inverse covariance matrix.
+
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, using
+            `sklearn.datasets.make_spd_matrix`.
+
+          numpy array
+            A numpy array of shape (n_features, n_features), that will
+            be used as such to initialize the metric.
+
     prior : Not used.
        .. deprecated:: 0.5.0
          `prior` was deprecated in version 0.5.0 and will
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 37e80fb7..635deb49 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -55,9 +55,9 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
@@ -472,13 +472,13 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+             The initial transformation will be a random SPD matrix of shape
+             `(n_features, n_features)`, using
+             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index d07a2cea..37430561 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -51,13 +51,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
@@ -243,13 +243,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
              A numpy array of shape (n_features, n_features), that will
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 013f9260..04dead84 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -6,6 +6,7 @@
 from scipy.spatial.distance import pdist, squareform, mahalanobis
 from sklearn import clone
 from sklearn.cluster import DBSCAN
+from sklearn.datasets import make_spd_matrix
 from sklearn.utils import check_random_state
 from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.testing import set_random_state
@@ -500,8 +501,8 @@ def test_init_mahalanobis(estimator, build_dataset):
     model.set_params(init='covariance')
     model.fit(input_data, labels)
 
-    # Initialize with a numpy array
-    init = rng.rand(X.shape[1], X.shape[1])
+    # Initialize with a random spd matrix
+    init = make_spd_matrix(X.shape[1], random_state=rng)
     model.set_params(init=init)
     model.fit(input_data, labels)
 

From eb98eff7d972b2d059092d95b320b3f4ed3ad049 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 15:02:10 +0200
Subject: [PATCH 14/52] Ensure the input mahalanobis metric initialization is
 symmetric, and say it should be SPD

---
 metric_learn/_util.py          |  8 +++++++-
 metric_learn/itml.py           |  2 +-
 metric_learn/lsml.py           |  2 +-
 metric_learn/mmc.py            |  2 +-
 metric_learn/sdml.py           |  2 +-
 test/test_mahalanobis_mixin.py | 12 +++++++++++-
 6 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index b6455b7b..ecba7be3 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -599,7 +599,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
              `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
+             An SPD matrix of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
   random_state : int or `numpy.RandomState` or None, optional (default=None)
@@ -629,6 +629,12 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
                        'dimensionality of the given inputs ({}).'
                        .format(init.shape, n_features))
 
+    # Assert that the matrix is symmetric
+    if not np.allclose(init, init.T):
+      raise ValueError("The initialization matrix should be semi-definite "
+                       "positive (SPD). It is not, since it appears not to be "
+                       "symmetric.")
+
   elif init in ['identity', 'covariance', 'random']:
     pass
   else:
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index b8468b6f..bac31c09 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -61,7 +61,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
+             An SPD matrix of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
     A0 : Not used
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index efa5c525..0a44144f 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -47,7 +47,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
+             An SPD matrix of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
     tol : float, optional
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 635deb49..056ae77e 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -60,7 +60,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
+             An SPD matrix of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
     verbose : bool, optional
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 37430561..786d10fc 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -60,7 +60,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
+             An SPD matrix of shape (n_features, n_features), that will
              be used as such to initialize the metric.
 
     use_cov : Not used.
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 04dead84..8d6a2b74 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -507,7 +507,7 @@ def test_init_mahalanobis(estimator, build_dataset):
     model.fit(input_data, labels)
 
     # init.shape[1] must match X.shape[1]
-    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1)
     model.set_params(init=init)
     msg = ('The input dimensionality {} of the given '
            'mahalanobis matrix `init` must match the '
@@ -517,6 +517,16 @@ def test_init_mahalanobis(estimator, build_dataset):
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg
 
+    # The input matrix must be symmetric
+    init = rng.rand(X.shape[1], X.shape[1])
+    model.set_params(init=init)
+    msg = ("The initialization matrix should be semi-definite "
+           "positive (SPD). It is not, since it appears not to be "
+           "symmetric.")
+    with pytest.raises(ValueError) as raised_error:
+      model.fit(input_data, labels)
+    assert str(raised_error.value) == msg
+
     # init must be as specified in the docstring
     model.set_params(init=1)
     msg = ("`init` must be 'identity', 'covariance', "

From 508d94e5ffa466d23e5a882f1c68d2af8bce92d5 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 15:05:47 +0200
Subject: [PATCH 15/52] various fixes

---
 metric_learn/mlkr.py | 2 +-
 metric_learn/mmc.py  | 2 +-
 test/test_utils.py   | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index e14af3c7..18628e77 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -122,7 +122,7 @@ def fit(self, X, y):
       X : (n x d) array of samples
       y : (n) data labels
       """
-      if self.init != 'deprecated':
+      if self.A0 != 'deprecated':
         warnings.warn('"A0" parameter is not used.'
                       ' It has been deprecated in version 0.5.0 and will be'
                       'removed in 0.6.0. Use "init" instead.',
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 056ae77e..518cca36 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -101,7 +101,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     super(_BaseMMC, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
-    if self.init != 'deprecated':
+    if self.A0 != 'deprecated':
       warnings.warn('"A0" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
                     'removed in 0.6.0. Use "init" instead.',
diff --git a/test/test_utils.py b/test/test_utils.py
index 2890251b..2dd6c781 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -928,9 +928,9 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset):
   formed_points_to_transform = dataset_formed.to_transform
   (indices_train, indices_test, y_train, y_test, formed_train,
    formed_test) = train_test_split(dataset_indices.data,
-                                          dataset_indices.target,
-                                          dataset_formed.data,
-                                          random_state=SEED)
+                                   dataset_indices.target,
+                                   dataset_formed.data,
+                                   random_state=SEED)
 
   def make_random_state(estimator):
     rs = {}

From bbf31cb189f091a47d15f53d190b433b82ea7362 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 15:51:40 +0200
Subject: [PATCH 16/52] MAINT: various refactoring - MLKR: update default test
 init - SDML: refactor prior_inv

---
 metric_learn/sdml.py | 3 ++-
 test/test_utils.py   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 786d10fc..0a775bda 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -104,7 +104,8 @@ def _fit(self, pairs, y):
                                     type_of_inputs='tuples')
 
     # set up (the inverse of) the prior M
-    prior_inv = pinvh(_initialize_metric_mahalanobis(pairs, self.init))
+    _, prior_inv = _initialize_metric_mahalanobis(pairs, self.init,
+                                                  return_inverse=True)
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
     emp_cov = prior_inv + self.balance_param * loss_matrix
diff --git a/test/test_utils.py b/test/test_utils.py
index 2dd6c781..4762c759 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -126,7 +126,7 @@ def build_quadruplets(with_preprocessor=False):
                            [learner for (learner, _) in
                             classifiers]))
 
-regressors = [(MLKR(), build_regression)]
+regressors = [(MLKR(init='pca'), build_regression)]
 ids_regressors = list(map(lambda x: x.__class__.__name__,
                           [learner for (learner, _) in regressors]))
 

From aafa8e2f7463c71579f08956b8039fff4edc6fb2 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 9 May 2019 17:13:43 +0200
Subject: [PATCH 17/52] FIX fix default covariance for SDML in tests

---
 test/metric_learn_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 7543f475..95dd5fe0 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -332,7 +332,7 @@ def test_sdml_raises_warning_non_psd(self):
     pseudo-covariance matrix is not PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y = [1, -1]
-    sdml = SDML(use_cov=True, sparsity_param=0.01, balance_param=0.5)
+    sdml = SDML(init='covariance', sparsity_param=0.01, balance_param=0.5)
     msg = ("Warning, the input matrix of graphical lasso is not "
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
@@ -429,7 +429,7 @@ def test_verbose_has_not_installed_skggm_sdml(capsys):
   # TODO: remove if we don't need skggm anymore
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
-  sdml = SDML(verbose=True)
+  sdml = SDML(verbose=True, init='covariance')
   sdml.fit(pairs, y_pairs)
   out, _ = capsys.readouterr()
   assert "SDML will use scikit-learn's graphical lasso solver." in out

From 748459ef9f3f1bc0aecc42ece9c3b87f24ed9afe Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 10 May 2019 08:41:24 +0200
Subject: [PATCH 18/52] Enhance docstring

---
 metric_learn/_util.py | 2 +-
 metric_learn/itml.py  | 2 +-
 metric_learn/lsml.py  | 4 ++--
 metric_learn/mmc.py   | 4 ++--
 metric_learn/sdml.py  | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index ecba7be3..8d878139 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -595,7 +595,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
 
          'random'
              The initial transformation will be a random SPD matrix of shape
-             `(n_features, n_features)`, using
+             `(n_features, n_features)`, generated using
              `sklearn.datasets.make_spd_matrix`.
 
          numpy array
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index bac31c09..9c60d205 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -57,7 +57,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
          'random'
            The initial transformation will be a random SPD matrix of shape
-           `(n_features, n_features)`, using
+           `(n_features, n_features)`, generated using
            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 0a44144f..d74bd612 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -43,7 +43,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
 
          'random'
             The initial transformation will be a random SPD matrix of shape
-            `(n_features, n_features)`, using
+            `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
@@ -236,7 +236,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
 
          'random'
             The initial transformation will be a random SPD matrix of shape
-            `(n_features, n_features)`, using
+            `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
           numpy array
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 518cca36..2d1e6fb6 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -56,7 +56,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
 
          'random'
             The initial transformation will be a random SPD matrix of shape
-            `(n_features, n_features)`, using
+            `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
@@ -477,7 +477,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
 
          'random'
              The initial transformation will be a random SPD matrix of shape
-             `(n_features, n_features)`, using
+             `(n_features, n_features)`, generated using
              `sklearn.datasets.make_spd_matrix`.
 
          numpy array
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 0a775bda..367457ca 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -56,7 +56,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
 
          'random'
             The initial transformation will be a random SPD matrix of shape
-            `(n_features, n_features)`, using
+            `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
@@ -249,7 +249,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
 
          'random'
             The initial transformation will be a random SPD matrix of shape
-            `(n_features, n_features)`, using
+            `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array

From 06a55dae1a71555ac0c650c34f631f0f0b46d884 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 10 May 2019 11:49:59 +0200
Subject: [PATCH 19/52] Set random state for SDML

---
 test/metric_learn_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 95dd5fe0..4f12e872 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -366,7 +366,7 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
     X, y = load_iris(return_X_y=True)
     sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01,
                            init='covariance')
-    sdml.fit(X, y)
+    sdml.fit(X, y, random_state=np.random.RandomState(42))
 
   def test_deprecation_use_cov(self):
     # test that a deprecation message is thrown if use_cov  is set at

From 26fb9e785b0afcb297de5009b2208f4881cbd811 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 14:41:46 +0200
Subject: [PATCH 20/52] Fix merge remove_spaces that was forgotten

---
 test/test_base_metric.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 90ed0114..1ba5d991 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -30,7 +30,7 @@ def test_nca(self):
     self.assertEqual(remove_spaces(str(metric_learn.NCA())),
                      remove_spaces(
                        "NCA(init='auto', max_iter=100, num_dims=None, "
-                     "preprocessor=None,\n  random_state=None, "
+                       "preprocessor=None,\n  random_state=None, "
                        "tol=None, verbose=False)"))
 
   def test_lfda(self):
@@ -55,12 +55,13 @@ def test_itml(self):
 """))
 
   def test_lsml(self):
-    self.assertEqual(
-        remove_spaces(str(metric_learn.LSML()), """
+    self.assertEqual(remove_spaces(str(metric_learn.LSML())),
+                     remove_spaces("""
 LSML(init='identity', max_iter=1000, preprocessor=None, prior='deprecated',
    random_state=None, tol=0.001, verbose=False)
-""".strip('\n')))
-    remove_spaces(self.assertEqual(str(metric_learn.LSML_Supervised()), """
+"""))
+    self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())),
+                     remove_spaces("""
 LSML_Supervised(init='identity', max_iter=1000, num_constraints=None,
         num_labeled='deprecated', preprocessor=None, prior='deprecated',
         random_state=None, tol=0.001, verbose=False, weights=None)

From 5e3daa43e33252cd8a5d2d726f23c9eca6f9a02b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 14:44:35 +0200
Subject: [PATCH 21/52] Fix indent

---
 test/test_base_metric.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 1ba5d991..007476c3 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -29,16 +29,16 @@ def test_lmnn(self):
   def test_nca(self):
     self.assertEqual(remove_spaces(str(metric_learn.NCA())),
                      remove_spaces(
-                       "NCA(init='auto', max_iter=100, num_dims=None, "
-                       "preprocessor=None,\n  random_state=None, "
-                       "tol=None, verbose=False)"))
+        "NCA(init='auto', max_iter=100, num_dims=None, "
+        "preprocessor=None,\n  random_state=None, "
+        "tol=None, verbose=False)"))
 
   def test_lfda(self):
     self.assertEqual(remove_spaces(str(metric_learn.LFDA())),
                      remove_spaces(
-                       "LFDA(embedding_type='weighted', k=None, "
-                       "num_dims=None, "
-                       "preprocessor=None)"))
+        "LFDA(embedding_type='weighted', k=None, "
+        "num_dims=None, "
+        "preprocessor=None)"))
 
   def test_itml(self):
     self.assertEqual(remove_spaces(str(metric_learn.ITML())),

From e86b61b7db2f432d291f8fd85e90ae80b55adf5e Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 15:05:20 +0200
Subject: [PATCH 22/52] XP: try to change the way we choose n_components to see
 if it fixes the test

---
 metric_learn/_util.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 5d330d28..16929e1d 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -559,13 +559,18 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
     elif init in {'pca', 'lda'}:
       init_time = time.time()
       if init == 'pca':
-        pca = PCA(n_components=num_dims,
+        # TODO: set back with n_components=num_dims
+        # pca = PCA(n_components=num_dims,
+        #           random_state=random_state)
+        pca = PCA(n_components=n_features,
                   random_state=random_state)
         if verbose:
           print('Finding principal components... ')
           sys.stdout.flush()
         pca.fit(input)
-        transformation = pca.components_
+        # TODO: change here too
+        transformation = pca.components_[:num_dims]
+        # transformation = pca.components_
       elif init == 'lda':
         lda = LinearDiscriminantAnalysis(n_components=num_dims)
         if verbose:

From 0b69e7edd5920b5f5c0f2a031e3aeb6052c05fb8 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 15:16:55 +0200
Subject: [PATCH 23/52] Revert "XP: try to change the way we choose
 n_components to see if it fixes the test"

This reverts commit e86b61b7db2f432d291f8fd85e90ae80b55adf5e.
---
 metric_learn/_util.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 16929e1d..5d330d28 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -559,18 +559,13 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
     elif init in {'pca', 'lda'}:
       init_time = time.time()
       if init == 'pca':
-        # TODO: set back with n_components=num_dims
-        # pca = PCA(n_components=num_dims,
-        #           random_state=random_state)
-        pca = PCA(n_components=n_features,
+        pca = PCA(n_components=num_dims,
                   random_state=random_state)
         if verbose:
           print('Finding principal components... ')
           sys.stdout.flush()
         pca.fit(input)
-        # TODO: change here too
-        transformation = pca.components_[:num_dims]
-        # transformation = pca.components_
+        transformation = pca.components_
       elif init == 'lda':
         lda = LinearDiscriminantAnalysis(n_components=num_dims)
         if verbose:

From 95a86a97177cfcb49bdfd9eee4fde4158810e5a5 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 15:21:36 +0200
Subject: [PATCH 24/52] Be more tolerant in test

---
 test/test_mahalanobis_mixin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 8d6a2b74..993f8044 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -194,7 +194,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator,
   a, b = (rng.randn(n_features), rng.randn(n_features))
   expected_dist = mahalanobis(a[None], b[None],
                               VI=model.get_mahalanobis_matrix())
-  assert_allclose(metric(a, b), expected_dist, rtol=1e-15)
+  assert_allclose(metric(a, b), expected_dist, rtol=1e-13)
 
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,

From d622fae379711e9ca66712b8c83d139b90c1e34b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 13 May 2019 16:35:41 +0200
Subject: [PATCH 25/52] Add test for singular covariance matrix

---
 test/test_mahalanobis_mixin.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 993f8044..cadb045a 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -487,6 +487,7 @@ def test_init_mahalanobis(estimator, build_dataset):
     """
     input_data, labels, _, X = build_dataset()
     model = clone(estimator)
+    set_random_state(model)
     rng = np.random.RandomState(42)
 
     # Start learning from scratch
@@ -535,3 +536,34 @@ def test_init_mahalanobis(estimator, build_dataset):
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg
+
+
+@pytest.mark.parametrize('estimator, build_dataset',
+                         [(ml, bd) for idml, (ml, bd)
+                          in zip(ids_metric_learners,
+                                 metric_learners)
+                          if not hasattr(ml, 'num_dims') and
+                          hasattr(ml, 'init')],
+                         ids=[idml for idml, (ml, _)
+                              in zip(ids_metric_learners,
+                                     metric_learners)
+                              if not hasattr(ml, 'num_dims') and
+                              hasattr(ml, 'init')])
+def test_singular_covariance_init(estimator, build_dataset):
+    """Tests that when using the 'covariance' init, it works even if the
+    covariance matrix is singular (see
+    https://github.com/metric-learn/metric-learn/issues/202)
+    """
+    input_data, labels, _, X = build_dataset()
+    model = clone(estimator)
+    set_random_state(model)
+    # We create a feature that is a linear combination of the first two
+    # features:
+    coefs = np.random.RandomState(42).randn(2, 1)
+    input_data = np.concatenate([input_data, input_data[:, ..., :2]
+                                .dot(coefs)],
+                                axis=-1)
+
+    # Fitting the model should return no error
+    model.set_params(init='covariance')
+    model.fit(input_data, labels)

From d2cc7cec3099edb1cd1bfaf761e24ec7473e110f Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 14 May 2019 10:04:51 +0200
Subject: [PATCH 26/52] Fix test_singular_covariance_init

---
 metric_learn/lsml.py | 2 +-
 metric_learn/sdml.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 0fd15db6..022650d9 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -154,7 +154,7 @@ def _total_loss(self, metric, vab, vcd, prior_inv):
     return self._comparison_loss(metric, vab, vcd) + reg_loss
 
   def _gradient(self, metric, vab, vcd, prior_inv):
-    dMetric = prior_inv - np.linalg.inv(metric)
+    dMetric = prior_inv - scipy.linalg.pinvh(metric)
     dabs = np.sum(vab.dot(metric) * vab, axis=1)
     dcds = np.sum(vcd.dot(metric) * vcd, axis=1)
     violations = dabs > dcds
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index b3940a24..fb7a7f25 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -112,7 +112,11 @@ def _fit(self, pairs, y):
                                                   return_inverse=True)
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
-    emp_cov = prior_inv + self.balance_param * loss_matrix
+    emp_cov = (prior_inv + self.balance_param * loss_matrix +
+               # We add a small value on the diagonal in case the
+               # emp_cov matrix is singular (see
+               # #https://github.com/metric-learn/metric-learn/issues/202)
+               np.eye(diff.shape[1]) * 1e-10)
 
     # our initialization will be the matrix with emp_cov's eigenvalues,
     # with a constant added so that they are all positive (plus an epsilon

From a7d2791d5c0ad0a958cd0610fd4146efdbc0df5b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 14 May 2019 10:13:39 +0200
Subject: [PATCH 27/52] DOC: update docstring saying pseudo-inverse

---
 metric_learn/_util.py | 2 +-
 metric_learn/itml.py  | 4 ++--
 metric_learn/lsml.py  | 4 ++--
 metric_learn/mmc.py   | 4 ++--
 metric_learn/sdml.py  | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 5d330d28..fba33f11 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -597,7 +597,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
              The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index e3b3c963..7a0430ac 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -54,7 +54,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-           The inverse covariance matrix.
+           The (pseudo-)inverse of the covariance matrix.
 
          'random'
            The initial transformation will be a random SPD matrix of shape
@@ -292,7 +292,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
         'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
              The initial transformation will be a random array of shape
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 022650d9..6cfc4c74 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -46,7 +46,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -239,7 +239,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index ebee93cf..fb925de6 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -52,7 +52,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
         'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -473,7 +473,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
              The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index fb7a7f25..581b4448 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -56,7 +56,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -253,7 +253,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape

From 3590cfa95ba390407d78ac0565dff6e7ab4c79be Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 14 May 2019 11:56:08 +0200
Subject: [PATCH 28/52] Revert "Fix test_singular_covariance_init"

This reverts commit d2cc7cec3099edb1cd1bfaf761e24ec7473e110f.
---
 metric_learn/_util.py |  2 +-
 metric_learn/itml.py  |  4 ++--
 metric_learn/lsml.py  |  6 +++---
 metric_learn/mmc.py   |  4 ++--
 metric_learn/sdml.py  | 10 +++-------
 5 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index fba33f11..5d330d28 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -597,7 +597,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
              The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 7a0430ac..e3b3c963 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -54,7 +54,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-           The (pseudo-)inverse of the covariance matrix.
+           The inverse covariance matrix.
 
          'random'
            The initial transformation will be a random SPD matrix of shape
@@ -292,7 +292,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
         'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
              The initial transformation will be a random array of shape
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 6cfc4c74..0fd15db6 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -46,7 +46,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -154,7 +154,7 @@ def _total_loss(self, metric, vab, vcd, prior_inv):
     return self._comparison_loss(metric, vab, vcd) + reg_loss
 
   def _gradient(self, metric, vab, vcd, prior_inv):
-    dMetric = prior_inv - scipy.linalg.pinvh(metric)
+    dMetric = prior_inv - np.linalg.inv(metric)
     dabs = np.sum(vab.dot(metric) * vab, axis=1)
     dcds = np.sum(vcd.dot(metric) * vcd, axis=1)
     violations = dabs > dcds
@@ -239,7 +239,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index fb925de6..ebee93cf 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -52,7 +52,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
         'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -473,7 +473,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
              The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 581b4448..b3940a24 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -56,7 +56,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -112,11 +112,7 @@ def _fit(self, pairs, y):
                                                   return_inverse=True)
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
-    emp_cov = (prior_inv + self.balance_param * loss_matrix +
-               # We add a small value on the diagonal in case the
-               # emp_cov matrix is singular (see
-               # #https://github.com/metric-learn/metric-learn/issues/202)
-               np.eye(diff.shape[1]) * 1e-10)
+    emp_cov = prior_inv + self.balance_param * loss_matrix
 
     # our initialization will be the matrix with emp_cov's eigenvalues,
     # with a constant added so that they are all positive (plus an epsilon
@@ -253,7 +249,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The (pseudo-)inverse of the covariance matrix.
+            The inverse covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape

From 503a71545fe27233eac0323f3588542574c01d29 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 15 May 2019 11:52:15 +0200
Subject: [PATCH 29/52] Ensure definiteness before returning the inverse

---
 metric_learn/_util.py          | 39 +++++++++++++++++++++++++++-------
 metric_learn/exceptions.py     |  8 +++++++
 metric_learn/lsml.py           |  8 ++++---
 metric_learn/sdml.py           |  8 ++++---
 test/test_mahalanobis_mixin.py | 15 +++++++------
 test/test_utils.py             | 35 ++++++++++++++++++++++++++++--
 6 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 5d330d28..5508c88f 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -1,13 +1,13 @@
 import numpy as np
+import scipy
 import six
 from numpy.linalg import LinAlgError
 from sklearn.datasets import make_spd_matrix
 from sklearn.decomposition import PCA
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_X_y, check_random_state
-from .exceptions import PreprocessorError
+from .exceptions import PreprocessorError, NonPSDError
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.utils.multiclass import type_of_target
 from scipy.linalg import pinvh
 import sys
 import time
@@ -341,6 +341,7 @@ def check_collapsed_pairs(pairs):
 def _check_sdp_from_eigen(w, tol=None):
   """Checks if some of the eigenvalues given are negative, up to a tolerance
   level, with a default value of the tolerance depending on the eigenvalues.
+  It also returns whether the matrix is definite.
 
   Parameters
   ----------
@@ -348,9 +349,14 @@ def _check_sdp_from_eigen(w, tol=None):
     Eigenvalues to check for non semidefinite positiveness.
 
   tol : positive `float`, optional
-    Negative eigenvalues above - tol are considered zero. If
+    Absolute eigenvalues below tol are considered zero. If
     tol is None, and eps is the epsilon value for datatype of w, then tol
-    is set to w.max() * len(w) * eps.
+    is set to abs(w).max() * len(w) * eps.
+
+  Returns
+  -------
+  is_definite : bool
+    Whether the matrix is definite or not.
 
   See Also
   --------
@@ -358,11 +364,14 @@ def _check_sdp_from_eigen(w, tol=None):
     strategy is applied here)
   """
   if tol is None:
-    tol = w.max() * len(w) * np.finfo(w.dtype).eps
+    tol = np.abs(w).max() * len(w) * np.finfo(w.dtype).eps
   if tol < 0:
     raise ValueError("tol should be positive.")
   if any(w < - tol):
-      raise ValueError("Matrix is not positive semidefinite (PSD).")
+    raise NonPSDError
+  if any(abs(w) < tol):
+    return False
+  return True
 
 
 def transformer_from_metric(metric, tol=None):
@@ -651,8 +660,13 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
   random_state = check_random_state(random_state)
   M = init
   if isinstance(init, np.ndarray):
+    s, u = scipy.linalg.eigh(init)
+    init_is_definite = _check_sdp_from_eigen(s)
     if return_inverse:
-      M_inv = pinvh(M)
+      if not init_is_definite:
+        raise LinAlgError("Cannot inverse the initialization matrix "
+                          "(it is not definite). Try another initialization.")
+      M_inv = np.dot(u / s, u.T)
   else:
     if init == 'identity':
       M = np.eye(n_features, n_features)
@@ -666,11 +680,20 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
         X = input
       M_inv = np.atleast_2d(np.cov(X, rowvar=False))
       # TODO: check atleast_2d necessary
-      M = pinvh(M_inv)
+      s, u = scipy.linalg.eigh(M_inv)
+      s_is_definite = _check_sdp_from_eigen(s)
+      if not s_is_definite:
+        raise LinAlgError("Cannot inverse the covariance matrix (it is not "
+                          "definite). Try another initialization.")
+      M = np.dot(u / s, u.T)
     elif init == 'random':
       # we need to create a random symmetric matrix
       M = make_spd_matrix(n_features, random_state=random_state)
       if return_inverse:
+        # we use pinvh even if we know the matrix is definite, just because
+        # we need the returned matrix to be symmetric (and sometimes
+        # np.linalg.inv returns not symmetric inverses of symmetric matrices)
+        # TODO: there might be a more efficient method to do so
         M_inv = pinvh(M)
   if return_inverse:
     return (M, M_inv)
diff --git a/metric_learn/exceptions.py b/metric_learn/exceptions.py
index 424d2c4f..76f09778 100644
--- a/metric_learn/exceptions.py
+++ b/metric_learn/exceptions.py
@@ -2,6 +2,7 @@
 The :mod:`metric_learn.exceptions` module includes all custom warnings and
 error classes used across metric-learn.
 """
+from numpy.linalg import LinAlgError
 
 
 class PreprocessorError(Exception):
@@ -10,3 +11,10 @@ def __init__(self, original_error):
     err_msg = ("An error occurred when trying to use the "
                "preprocessor: {}").format(repr(original_error))
     super(PreprocessorError, self).__init__(err_msg)
+
+
+class NonPSDError(LinAlgError):
+
+    def __init__(self):
+      err_msg = "Matrix is not positive semidefinite (PSD)."
+      super(LinAlgError, self).__init__(err_msg)
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 0fd15db6..61cf9bd2 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -40,7 +40,8 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For LSML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -54,8 +55,9 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             An SPD matrix of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to initialize
+             the metric.
 
     tol : float, optional
     max_iter : int, optional
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index b3940a24..98499c71 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -50,7 +50,8 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For SDML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -64,8 +65,9 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             An SPD matrix of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to initialize
+             the metric.
 
     use_cov : Not used.
         .. deprecated:: 0.5.0
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index cadb045a..056ccdd3 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -2,6 +2,7 @@
 
 import pytest
 import numpy as np
+from numpy.linalg import LinAlgError
 from numpy.testing import assert_array_almost_equal, assert_allclose
 from scipy.spatial.distance import pdist, squareform, mahalanobis
 from sklearn import clone
@@ -550,8 +551,8 @@ def test_init_mahalanobis(estimator, build_dataset):
                               if not hasattr(ml, 'num_dims') and
                               hasattr(ml, 'init')])
 def test_singular_covariance_init(estimator, build_dataset):
-    """Tests that when using the 'covariance' init, it works even if the
-    covariance matrix is singular (see
+    """Tests that when using the 'covariance' init, it returns the
+    appropriate error if the covariance matrix is singular (see
     https://github.com/metric-learn/metric-learn/issues/202)
     """
     input_data, labels, _, X = build_dataset()
@@ -559,11 +560,13 @@ def test_singular_covariance_init(estimator, build_dataset):
     set_random_state(model)
     # We create a feature that is a linear combination of the first two
     # features:
-    coefs = np.random.RandomState(42).randn(2, 1)
     input_data = np.concatenate([input_data, input_data[:, ..., :2]
-                                .dot(coefs)],
+                                 .dot([[2], [3]])],
                                 axis=-1)
 
-    # Fitting the model should return no error
     model.set_params(init='covariance')
-    model.fit(input_data, labels)
+    msg = ("Cannot inverse the covariance matrix "
+           "(it is not definite). Try another initialization.")
+    with pytest.raises(LinAlgError) as raised_err:
+      model.fit(input_data, labels)
+    assert str(raised_err.value) == msg
diff --git a/test/test_utils.py b/test/test_utils.py
index 67eb23eb..998a6077 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -19,7 +19,7 @@
 from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin,
                                       _PairsClassifierMixin,
                                       _QuadrupletsClassifierMixin)
-from metric_learn.exceptions import PreprocessorError
+from metric_learn.exceptions import PreprocessorError, NonPSDError
 from sklearn.datasets import make_regression, make_blobs, load_iris
 
 
@@ -996,7 +996,7 @@ def test__validate_vector():
     validate_vector(x)
 
 
-def test_check_sdp_from_eigen_positive_err_messages():
+def test__check_sdp_from_eigen_positive_err_messages():
   """Tests that if _check_sdp_from_eigen is given a negative tol it returns
   an error, and if positive (or None) it does not"""
   w = np.abs(np.random.RandomState(42).randn(10)) + 1
@@ -1011,6 +1011,37 @@ def test_check_sdp_from_eigen_positive_err_messages():
   _check_sdp_from_eigen(w, None)
 
 
+@pytest.mark.unit
+@pytest.mark.parametrize('w', [np.array([-1.2, 5.5, 6.6]),
+                               np.array([-1.2, -5.6])])
+def test__check_sdp_from_eigen_positive_eigenvalues(w):
+  """Tests that _check_sdp_from_eigen, returns a NonPSDError when
+  the eigenvalues are negatives or null."""
+  with pytest.raises(NonPSDError):
+    _check_sdp_from_eigen(w)
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize('w', [np.array([0., 2.3, 5.3]),
+                               np.array([1e-20, 3.5]),
+                               np.array([1.5, 2.4, 4.6])])
+def test__check_sdp_from_eigen_negative_eigenvalues(w):
+  """Tests that _check_sdp_from_eigen, returns no error when the
+  eigenvalues are positive."""
+  _check_sdp_from_eigen(w)
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize('w, is_definite', [(np.array([1e-15, 5.6]), False),
+                                            (np.array([-1e-15, 5.6]), False),
+                                            (np.array([3.2, 5.6, 0.01]), True),
+                                            ])
+def test__check_sdp_from_eigen_returns_definiteness(w, is_definite):
+  """Tests that _check_sdp_from_eigen returns the definiteness of the
+  matrix (when it is PSD), based on the given eigenvalues"""
+  assert _check_sdp_from_eigen(w) == is_definite
+
+
 @pytest.mark.unit
 @pytest.mark.parametrize('wrong_labels',
                          [[0.5, 0.6, 0.7, 0.8, 0.9],

From 32bbdf32f2f52845776963cc6ab6581d155771bf Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 15 May 2019 17:08:27 +0200
Subject: [PATCH 30/52] wip deal with non definiteness

---
 metric_learn/_util.py                      | 40 ++++++-----
 metric_learn/itml.py                       | 35 +++++-----
 metric_learn/lsml.py                       | 17 +++--
 metric_learn/mmc.py                        |  6 +-
 metric_learn/sdml.py                       | 16 +++--
 test/test_mahalanobis_mixin.py             | 78 ++++++++++++++++++----
 test/test_transformer_metric_conversion.py |  5 +-
 7 files changed, 134 insertions(+), 63 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 5508c88f..8a8bc05b 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -588,9 +588,9 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
 
 
 def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
-                                   return_inverse=False):
-  """Returns the initial mahalanobis matrix to be used depending on the
-  arguments.
+                                   return_inverse=False, strict_pd=False):
+  """Returns a standard mahalanobis matrix that can be used as a prior or an
+  initialization
 
   Parameters
   ----------
@@ -614,8 +614,9 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
              `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             An SPD matrix of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A PSD matrix (or strictly PD if strict_pd==True) of
+             shape (n_features, n_features), that will be used as such to
+             initialize the metric, or set the prior.
 
   random_state : int or `numpy.RandomState` or None, optional (default=None)
     A pseudo random number generator object or a seed for it if int. If
@@ -625,7 +626,12 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
 
   return_inverse : bool, optional (default=False)
     Whether to return the inverse of the matrix initializing the metric. This
-    can be sometimes useful.
+    can be sometimes useful. It will return the pseudo-inverse (which is the
+    same as the inverse if the matrix is definite (i.e. invertible))
+
+  strict_pd : bool, optional (default=False)
+    Whether to enforce that the provided matrix is definite (in addition to
+    being PSD).
 
   Returns
   -------
@@ -646,9 +652,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
 
     # Assert that the matrix is symmetric
     if not np.allclose(init, init.T):
-      raise ValueError("The initialization matrix should be semi-definite "
-                       "positive (SPD). It is not, since it appears not to be "
-                       "symmetric.")
+      raise ValueError("The given matrix is not symmetric.")
 
   elif init in ['identity', 'covariance', 'random']:
     pass
@@ -662,10 +666,12 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
   if isinstance(init, np.ndarray):
     s, u = scipy.linalg.eigh(init)
     init_is_definite = _check_sdp_from_eigen(s)
+    if strict_pd and not init_is_definite:
+      raise LinAlgError("You should provide a strictly positive definite "
+                        "matrix. This one is not definite. Try another "
+                        "initialization, or an algorithm that does not "
+                        "require the init to be strictly positive definite.")
     if return_inverse:
-      if not init_is_definite:
-        raise LinAlgError("Cannot inverse the initialization matrix "
-                          "(it is not definite). Try another initialization.")
       M_inv = np.dot(u / s, u.T)
   else:
     if init == 'identity':
@@ -681,10 +687,12 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
       M_inv = np.atleast_2d(np.cov(X, rowvar=False))
       # TODO: check atleast_2d necessary
       s, u = scipy.linalg.eigh(M_inv)
-      s_is_definite = _check_sdp_from_eigen(s)
-      if not s_is_definite:
-        raise LinAlgError("Cannot inverse the covariance matrix (it is not "
-                          "definite). Try another initialization.")
+      cov_is_definite = _check_sdp_from_eigen(s)
+      if strict_pd and not cov_is_definite:
+        raise LinAlgError("Unable to get a true inverse of the covariance "
+                          "matrix since it is not definite. Try another "
+                          "initialization, or an algorithm that does not "
+                          "require the init to be strictly positive definite.")
       M = np.dot(u / s, u.T)
     elif init == 'random':
       # we need to create a random symmetric matrix
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index e3b3c963..4c6b26be 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -48,22 +48,24 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For ITML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-           The inverse covariance matrix.
+            The inverse covariance matrix.
 
          'random'
-           The initial transformation will be a random SPD matrix of shape
-           `(n_features, n_features)`, generated using
-           `sklearn.datasets.make_spd_matrix`.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, generated using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             An SPD matrix of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to set the
+             prior.
 
     A0 : Not used
       .. deprecated:: 0.5.0
@@ -111,7 +113,8 @@ def _fit(self, pairs, y, bounds=None):
     self.bounds_[self.bounds_ == 0] = 1e-9
     # init metric
     # pairs will be deduplicated into X two times, see how to avoid that
-    A = _initialize_metric_mahalanobis(pairs, self.init, self.random_state)
+    A = _initialize_metric_mahalanobis(pairs, self.init, self.random_state,
+                                       strict_pd=True)
 
     gamma = self.gamma
     pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
@@ -286,22 +289,24 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For ITML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The inverse covariance matrix.
 
          'random'
-             The initial transformation will be a random array of shape
-             `(n_features, n_features)`. Each value is sampled from the
-             standard normal distribution.
+            The initial transformation will be a random SPD matrix of shape
+            `(n_features, n_features)`, generated using
+            `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to set the
+             prior.
 
     A0 : Not used
       .. deprecated:: 0.5.0
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 61cf9bd2..30a9e4d3 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -56,8 +56,8 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
 
          numpy array
              A positive definite (PD) matrix of shape
-             (n_features, n_features), that will be used as such to initialize
-             the metric.
+             (n_features, n_features), that will be used as such to set the
+             prior.
 
     tol : float, optional
     max_iter : int, optional
@@ -103,7 +103,8 @@ def _fit(self, quadruplets, weights=None):
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
     M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.init,
-                                                  return_inverse=True)
+                                                  return_inverse=True,
+                                                  strict_pd=True)
 
     step_sizes = np.logspace(-10, 0, 10)
     # Keep track of the best step size and the loss at that step.
@@ -235,7 +236,8 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For LSML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -248,9 +250,10 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
-          numpy array
-            A numpy array of shape (n_features, n_features), that will
-            be used as such to initialize the metric.
+         numpy array
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to set the
+             prior.
 
     prior : Not used.
        .. deprecated:: 0.5.0
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index ebee93cf..036c4951 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -52,7 +52,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             An identity matrix of shape (n_features, n_features).
 
         'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse of the covariance matrix.
 
          'random'
             The initial transformation will be a random SPD matrix of shape
@@ -470,10 +470,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
          (n_features, n_features).
 
          'identity'
-            An identity matrix of shape (n_features, n_features).
+             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+             The (pseudo-)inverse of the covariance matrix.
 
          'random'
              The initial transformation will be a random SPD matrix of shape
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 98499c71..94b6a8dc 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -66,8 +66,8 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
 
          numpy array
              A positive definite (PD) matrix of shape
-             (n_features, n_features), that will be used as such to initialize
-             the metric.
+             (n_features, n_features), that will be used as such to set the
+             prior.
 
     use_cov : Not used.
         .. deprecated:: 0.5.0
@@ -111,7 +111,8 @@ def _fit(self, pairs, y):
 
     # set up (the inverse of) the prior M
     _, prior_inv = _initialize_metric_mahalanobis(pairs, self.init,
-                                                  return_inverse=True)
+                                                  return_inverse=True,
+                                                  strict_pd=True)
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
     emp_cov = prior_inv + self.balance_param * loss_matrix
@@ -245,7 +246,8 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     init : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+         (n_features, n_features). For SDML, the init should be strictly
+         positive definite (PD).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -259,8 +261,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
-             A numpy array of shape (n_features, n_features), that will
-             be used as such to initialize the metric.
+             A positive definite (PD) matrix of shape
+             (n_features, n_features), that will be used as such to set the
+             prior.
+
     use_cov : Not used.
         .. deprecated:: 0.5.0
           `A0` was deprecated in version 0.5.0 and will
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 056ccdd3..36c9e2c5 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -5,6 +5,7 @@
 from numpy.linalg import LinAlgError
 from numpy.testing import assert_array_almost_equal, assert_allclose
 from scipy.spatial.distance import pdist, squareform, mahalanobis
+from scipy.stats import ortho_group
 from sklearn import clone
 from sklearn.cluster import DBSCAN
 from sklearn.datasets import make_spd_matrix
@@ -15,11 +16,10 @@
 from metric_learn._util import make_context
 from metric_learn.base_metric import (_QuadrupletsClassifierMixin,
                                       _PairsClassifierMixin)
+from metric_learn.exceptions import NonPSDError
 
 from test.test_utils import (ids_metric_learners, metric_learners,
-                             remove_y_quadruplets, ids_regressors,
-                             ids_supervised_learners, supervised_learners,
-                             ids_classifiers)
+                             remove_y_quadruplets, ids_classifiers)
 
 RNG = check_random_state(0)
 
@@ -522,13 +522,22 @@ def test_init_mahalanobis(estimator, build_dataset):
     # The input matrix must be symmetric
     init = rng.rand(X.shape[1], X.shape[1])
     model.set_params(init=init)
-    msg = ("The initialization matrix should be semi-definite "
-           "positive (SPD). It is not, since it appears not to be "
-           "symmetric.")
+    msg = ("The given matrix is not symmetric.")
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, labels)
     assert str(raised_error.value) == msg
 
+    # The input matrix must be SPD
+    P = ortho_group.rvs(X.shape[1], random_state=rng)
+    w = np.abs(rng.randn(X.shape[1]))
+    w[0] = -10.
+    M = P.dot(np.diag(w)).dot(P.T)
+    model.set_params(init=M)
+    msg = ("Matrix is not positive semidefinite (PSD).")
+    with pytest.raises(NonPSDError) as raised_err:
+      model.fit(input_data, labels)
+    assert str(raised_err.value) == msg
+
     # init must be as specified in the docstring
     model.set_params(init=1)
     msg = ("`init` must be 'identity', 'covariance', "
@@ -543,17 +552,18 @@ def test_init_mahalanobis(estimator, build_dataset):
                          [(ml, bd) for idml, (ml, bd)
                           in zip(ids_metric_learners,
                                  metric_learners)
-                          if not hasattr(ml, 'num_dims') and
-                          hasattr(ml, 'init')],
+                          if idml[:4] in ['ITML', 'SDML', 'LSML']],
                          ids=[idml for idml, (ml, _)
                               in zip(ids_metric_learners,
                                      metric_learners)
-                              if not hasattr(ml, 'num_dims') and
-                              hasattr(ml, 'init')])
+                              if idml[:4] in ['ITML', 'SDML', 'LSML']])
 def test_singular_covariance_init(estimator, build_dataset):
     """Tests that when using the 'covariance' init, it returns the
-    appropriate error if the covariance matrix is singular (see
-    https://github.com/metric-learn/metric-learn/issues/202)
+    appropriate error if the covariance matrix is singular, for algorithms
+    that need a strictly PD prior or init (see
+    https://github.com/metric-learn/metric-learn/issues/202 and
+    https://github.com/metric-learn/metric-learn/pull/195#issuecomment
+    -492332451)
     """
     input_data, labels, _, X = build_dataset()
     model = clone(estimator)
@@ -565,8 +575,48 @@ def test_singular_covariance_init(estimator, build_dataset):
                                 axis=-1)
 
     model.set_params(init='covariance')
-    msg = ("Cannot inverse the covariance matrix "
-           "(it is not definite). Try another initialization.")
+    msg = ("Unable to get a true inverse of the covariance "
+           "matrix since it is not definite. Try another "
+           "initialization, or an algorithm that does not "
+           "require the init to be strictly positive definite.")
+    with pytest.raises(LinAlgError) as raised_err:
+      model.fit(input_data, labels)
+    assert str(raised_err.value) == msg
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize('estimator, build_dataset',
+                         [(ml, bd) for idml, (ml, bd)
+                          in zip(ids_metric_learners,
+                                 metric_learners)
+                          if idml[:4] in ['ITML', 'SDML', 'LSML']],
+                         ids=[idml for idml, (ml, _)
+                              in zip(ids_metric_learners,
+                                     metric_learners)
+                              if idml[:4] in ['ITML', 'SDML', 'LSML']])
+@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20])
+def test_singular_array_init(estimator, build_dataset, w0):
+    """Tests that when using a custom array init, it returns the
+    appropriate error if it is singular, for algorithms
+    that need a strictly PD prior or init (see
+    https://github.com/metric-learn/metric-learn/issues/202 and
+    https://github.com/metric-learn/metric-learn/pull/195#issuecomment
+    -492332451)
+    """
+    rng = np.random.RandomState(42)
+    input_data, labels, _, X = build_dataset()
+    model = clone(estimator)
+    set_random_state(model)
+
+    P = ortho_group.rvs(X.shape[1], random_state=rng)
+    w = np.abs(rng.randn(X.shape[1]))
+    w[0] = w0
+    M = P.dot(np.diag(w)).dot(P.T)
+    model.set_params(init=M)
+    msg = ("You should provide a strictly positive definite matrix. "
+           "This one is not definite. Try another "
+           "initialization, or an algorithm that does not "
+           "require the init to be strictly positive definite.")
     with pytest.raises(LinAlgError) as raised_err:
       model.fit(input_data, labels)
     assert str(raised_err.value) == msg
diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py
index 7c6d2d77..5615dc14 100644
--- a/test/test_transformer_metric_conversion.py
+++ b/test/test_transformer_metric_conversion.py
@@ -11,6 +11,7 @@
     LMNN, NCA, LFDA, Covariance, MLKR,
     LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised)
 from metric_learn._util import transformer_from_metric
+from metric_learn.exceptions import NonPSDError
 
 
 class TestTransformerMetricConversion(unittest.TestCase):
@@ -162,10 +163,10 @@ def test_non_psd_raises(self):
     P = ortho_group.rvs(7, random_state=rng)
     M = P.dot(D).dot(P.T)
     msg = ("Matrix is not positive semidefinite (PSD).")
-    with pytest.raises(ValueError) as raised_error:
+    with pytest.raises(NonPSDError) as raised_error:
       transformer_from_metric(M)
     assert str(raised_error.value) == msg
-    with pytest.raises(ValueError) as raised_error:
+    with pytest.raises(NonPSDError) as raised_error:
       transformer_from_metric(D)
     assert str(raised_error.value) == msg
 

From fdad8c2ee58fac81bd9291b4d90f07eef889a037 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 16 May 2019 10:25:33 +0200
Subject: [PATCH 31/52] Rename init to prior for SDML and LSML

---
 metric_learn/lsml.py                       | 49 +++++++------------
 metric_learn/sdml.py                       | 37 +++++++--------
 test/metric_learn_test.py                  | 55 +++++++---------------
 test/test_base_metric.py                   | 15 +++---
 test/test_fit_transform.py                 |  4 +-
 test/test_mahalanobis_mixin.py             | 21 ++++++---
 test/test_sklearn_compat.py                |  2 +-
 test/test_transformer_metric_conversion.py |  2 +-
 test/test_utils.py                         |  4 +-
 9 files changed, 80 insertions(+), 109 deletions(-)

diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 30a9e4d3..b1eb303b 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -30,17 +30,16 @@ class _BaseLSML(MahalanobisMixin):
 
   _tuple_size = 4  # constraints are quadruplets
 
-  def __init__(self, tol=1e-3, max_iter=1000, init='identity',
-               prior='deprecated', verbose=False, preprocessor=None,
-               random_state=None):
+  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize LSML.
 
     Parameters
     ----------
-    init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+    prior : string or numpy array, optional (default='identity')
+         Prior to set for the metric. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For LSML, the init should be strictly
+         (n_features, n_features). For LSML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -50,7 +49,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The initial transformation will be a random PD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -61,10 +60,6 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
 
     tol : float, optional
     max_iter : int, optional
-    prior : Not used.
-       .. deprecated:: 0.5.0
-         `prior` was deprecated in version 0.5.0 and will
-         be removed in 0.6.0. Use 'init' instead.
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
@@ -75,7 +70,6 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
         ``init='random'``, ``random_state`` is used to initialize the random
         transformation.
     """
-    self.init = init
     self.prior = prior
     self.tol = tol
     self.max_iter = max_iter
@@ -84,11 +78,6 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
     super(_BaseLSML, self).__init__(preprocessor)
 
   def _fit(self, quadruplets, weights=None):
-    if self.prior != 'deprecated':
-      warnings.warn('"prior" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "init" instead.',
-                    DeprecationWarning)
     quadruplets = self._prepare_inputs(quadruplets,
                                        type_of_inputs='tuples')
 
@@ -102,7 +91,7 @@ def _fit(self, quadruplets, weights=None):
     else:
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
-    M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.init,
+    M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.prior,
                                                   return_inverse=True,
                                                   strict_pd=True)
 
@@ -218,10 +207,9 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, tol=1e-3, max_iter=1000, init='identity',
-               prior='deprecated', num_labeled='deprecated',
-               num_constraints=None, weights=None, verbose=False,
-               preprocessor=None, random_state=None):
+  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
+               num_labeled='deprecated', num_constraints=None, weights=None,
+               verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `LSML`.
 
     `LSML_Supervised` creates quadruplets from labeled samples by taking two
@@ -233,10 +221,10 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
     ----------
     tol : float, optional
     max_iter : int, optional
-    init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+    prior : string or numpy array, optional (default='identity')
+         Prior to set for the metric. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For LSML, the init should be strictly
+         (n_features, n_features). For LSML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -246,7 +234,7 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The initial transformation will be a random PD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -254,11 +242,6 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
              A positive definite (PD) matrix of shape
              (n_features, n_features), that will be used as such to set the
              prior.
-
-    prior : Not used.
-       .. deprecated:: 0.5.0
-         `prior` was deprecated in version 0.5.0 and will
-         be removed in 0.6.0. Use 'init' instead.
     num_labeled : Not used
       .. deprecated:: 0.5.0
          `num_labeled` was deprecated in version 0.5.0 and will
@@ -277,8 +260,8 @@ def __init__(self, tol=1e-3, max_iter=1000, init='identity',
         ``init='random'``, ``random_state`` is used to initialize the random
         transformation.
     """
-    _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, init=init,
-                       prior=prior, verbose=verbose, preprocessor=preprocessor,
+    _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
+                       verbose=verbose, preprocessor=preprocessor,
                        random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 94b6a8dc..89ed02fa 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -35,7 +35,7 @@ class _BaseSDML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
                use_cov='deprecated', verbose=False, preprocessor=None,
                random_state=None):
     """
@@ -47,10 +47,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
 
-    init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+    prior : string or numpy array, optional (default='identity')
+         Prior to set for the metric. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For SDML, the init should be strictly
+         (n_features, n_features). For SDML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -60,7 +60,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The initial transformation will be a random PD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -72,7 +72,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     use_cov : Not used.
         .. deprecated:: 0.5.0
           `A0` was deprecated in version 0.5.0 and will
-          be removed in 0.6.0. Use 'init' instead.
+          be removed in 0.6.0. Use 'prior' instead.
 
     verbose : bool, optional
         if True, prints information while learning
@@ -83,12 +83,11 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
 
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``prior='random'``, ``random_state`` is used to set the prior.
     """
     self.balance_param = balance_param
     self.sparsity_param = sparsity_param
-    self.init = init
+    self.prior = prior
     self.use_cov = use_cov  # TODO: deprecate and replace by init
     self.verbose = verbose
     self.random_state = random_state
@@ -98,7 +97,7 @@ def _fit(self, pairs, y):
     if self.use_cov != 'deprecated':
       warnings.warn('"use_cov" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "init" instead.',
+                    'removed in 0.6.0. Use "prior" instead.',
                     DeprecationWarning)
     if not HAS_SKGGM:
       if self.verbose:
@@ -110,7 +109,7 @@ def _fit(self, pairs, y):
                                     type_of_inputs='tuples')
 
     # set up (the inverse of) the prior M
-    _, prior_inv = _initialize_metric_mahalanobis(pairs, self.init,
+    _, prior_inv = _initialize_metric_mahalanobis(pairs, self.prior,
                                                   return_inverse=True,
                                                   strict_pd=True)
     diff = pairs[:, 0] - pairs[:, 1]
@@ -127,7 +126,7 @@ def _fit(self, pairs, y):
                     "positive semi-definite (PSD). The algorithm may diverge, "
                     "and lead to degenerate solutions. "
                     "To prevent that, try to decrease the balance parameter "
-                    "`balance_param` and/or to set init='identity'.",
+                    "`balance_param` and/or to set prior='identity'.",
                     ConvergenceWarning)
       w -= min_eigval  # we translate the eigenvalues to make them all positive
     w += 1e-10  # we add a small offset to avoid definiteness problems
@@ -227,7 +226,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
                use_cov='deprecated', num_labeled='deprecated',
                num_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
@@ -243,10 +242,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
         trade off between sparsity and M0 prior
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
-    init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+    prior : string or numpy array, optional (default='identity')
+          Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For SDML, the init should be strictly
+         (n_features, n_features). For SDML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -256,7 +255,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The prior will be a random SPD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -268,7 +267,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
     use_cov : Not used.
         .. deprecated:: 0.5.0
           `A0` was deprecated in version 0.5.0 and will
-          be removed in 0.6.0. Use 'init' instead.
+          be removed in 0.6.0. Use 'prior' instead.
 
     num_labeled : Not used
       .. deprecated:: 0.5.0
@@ -287,7 +286,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, init='identity',
         transformation.
     """
     _BaseSDML.__init__(self, balance_param=balance_param,
-                       sparsity_param=sparsity_param, init=init,
+                       sparsity_param=sparsity_param, prior=prior,
                        use_cov=use_cov, verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
     self.num_labeled = num_labeled
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 4f12e872..c8da4084 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -74,27 +74,6 @@ def test_deprecation_num_labeled(self):
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y)
 
-  def test_deprecation_prior(self):
-    # test that a deprecation message is thrown if A0 is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lsml_supervised = LSML_Supervised(prior=np.ones_like(X))
-    msg = ('"prior" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "init" instead.')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      lsml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    quadruplets = np.array([[[-10., 0.], [10., 0.], [0., 50.], [0., -60]],
-                            [[-27., 31.], [12., 52.], [71., 30.], [41., -24]]])
-    lsml = LSML(prior=np.ones_like(X))
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      lsml.fit(quadruplets)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
 
 class TestITML(MetricTestCase):
   def test_iris(self):
@@ -231,7 +210,7 @@ def test_sdml_raises_warning_msg_not_installed_skggm(self):
     # because it will return a non SPD matrix
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y_pairs = [1, -1]
-    sdml = SDML(init='identity', balance_param=100, verbose=True)
+    sdml = SDML(prior='identity', balance_param=100, verbose=True)
 
     msg = ("There was a problem in SDML when using scikit-learn's graphical "
            "lasso solver. skggm's graphical lasso can sometimes converge on "
@@ -254,7 +233,7 @@ def test_sdml_raises_warning_msg_installed_skggm(self):
     # because it will return non finite values
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y_pairs = [1, -1]
-    sdml = SDML(init='identity', balance_param=100, verbose=True)
+    sdml = SDML(prior='identity', balance_param=100, verbose=True)
 
     msg = ("There was a problem in SDML when using skggm's graphical "
            "lasso solver.")
@@ -277,7 +256,7 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self):
     # pathological case)
     X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]])
     y = [0, 0, 1, 1]
-    sdml_supervised = SDML_Supervised(balance_param=0.5, init='identity',
+    sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity',
                                       sparsity_param=0.01)
     msg = ("There was a problem in SDML when using skggm's graphical "
            "lasso solver.")
@@ -295,11 +274,11 @@ def test_raises_no_warning_installed_skggm(self):
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
     with pytest.warns(None) as record:
-      sdml = SDML(init='covariance')
+      sdml = SDML(prior='covariance')
       sdml.fit(pairs, y_pairs)
     assert len(record) == 0
     with pytest.warns(None) as record:
-      sdml = SDML_Supervised(init='identity', balance_param=1e-5)
+      sdml = SDML_Supervised(prior='identity', balance_param=1e-5)
       sdml.fit(X, y)
     assert len(record) == 0
 
@@ -308,7 +287,7 @@ def test_iris(self):
     # TODO: un-flake it!
     rs = np.random.RandomState(5555)
 
-    sdml = SDML_Supervised(num_constraints=1500, init='identity',
+    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
                            balance_param=5e-5)
     sdml.fit(self.iris_points, self.iris_labels, random_state=rs)
     csep = class_separation(sdml.transform(self.iris_points),
@@ -320,7 +299,7 @@ def test_deprecation_num_labeled(self):
     # initialization
     # TODO: remove in v.0.6
     X, y = make_classification(random_state=42)
-    sdml_supervised = SDML_Supervised(num_labeled=np.inf, init='identity',
+    sdml_supervised = SDML_Supervised(num_labeled=np.inf, prior='identity',
                                       balance_param=5e-5)
     msg = ('"num_labeled" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
@@ -332,12 +311,12 @@ def test_sdml_raises_warning_non_psd(self):
     pseudo-covariance matrix is not PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
     y = [1, -1]
-    sdml = SDML(init='covariance', sparsity_param=0.01, balance_param=0.5)
+    sdml = SDML(prior='covariance', sparsity_param=0.01, balance_param=0.5)
     msg = ("Warning, the input matrix of graphical lasso is not "
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
-           "`balance_param` and/or to set init='identity'.")
+           "`balance_param` and/or to set prior='identity'.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
       try:
         sdml.fit(pairs, y)
@@ -352,7 +331,7 @@ def test_sdml_converges_if_psd(self):
     pseudo-covariance matrix is PSD"""
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y = [1, -1]
-    sdml = SDML(init='covariance', sparsity_param=0.01, balance_param=0.5)
+    sdml = SDML(prior='covariance', sparsity_param=0.01, balance_param=0.5)
     sdml.fit(pairs, y)
     assert np.isfinite(sdml.get_mahalanobis_matrix()).all()
 
@@ -365,7 +344,7 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
     it should work, but scikit-learn's graphical_lasso does not work"""
     X, y = load_iris(return_X_y=True)
     sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01,
-                           init='covariance')
+                           prior='covariance')
     sdml.fit(X, y, random_state=np.random.RandomState(42))
 
   def test_deprecation_use_cov(self):
@@ -378,7 +357,7 @@ def test_deprecation_use_cov(self):
                                       balance_param=1e-5)
     msg = ('"use_cov" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "init" instead.')
+           'removed in 0.6.0. Use "prior" instead.')
     with pytest.warns(DeprecationWarning) as raised_warning:
       sdml_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
@@ -400,7 +379,7 @@ def test_verbose_has_installed_skggm_sdml(capsys):
   # TODO: remove if we don't need skggm anymore
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
-  sdml = SDML(verbose=True, init='covariance')
+  sdml = SDML(verbose=True, prior='covariance')
   sdml.fit(pairs, y_pairs)
   out, _ = capsys.readouterr()
   assert "SDML will use skggm's graphical lasso solver." in out
@@ -413,8 +392,8 @@ def test_verbose_has_installed_skggm_sdml_supervised(capsys):
   # Test that if users have installed skggm, a message is printed telling them
   # skggm's solver is used (when they use SDML_Supervised)
   # TODO: remove if we don't need skggm anymore
-  X, y = make_classification(random_state=42)
-  sdml = SDML_Supervised(verbose=True, init='covariance')
+  X, y = load_iris(return_X_y=True)
+  sdml = SDML_Supervised(verbose=True, prior='identity', balance_param=1e-5)
   sdml.fit(X, y)
   out, _ = capsys.readouterr()
   assert "SDML will use skggm's graphical lasso solver." in out
@@ -429,7 +408,7 @@ def test_verbose_has_not_installed_skggm_sdml(capsys):
   # TODO: remove if we don't need skggm anymore
   pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
   y_pairs = [1, -1]
-  sdml = SDML(verbose=True, init='covariance')
+  sdml = SDML(verbose=True, prior='covariance')
   sdml.fit(pairs, y_pairs)
   out, _ = capsys.readouterr()
   assert "SDML will use scikit-learn's graphical lasso solver." in out
@@ -443,7 +422,7 @@ def test_verbose_has_not_installed_skggm_sdml_supervised(capsys):
   # skggm's solver is used (when they use SDML_Supervised)
   # TODO: remove if we don't need skggm anymore
   X, y = make_classification(random_state=42)
-  sdml = SDML_Supervised(verbose=True, balance_param=1e-5, init='identity')
+  sdml = SDML_Supervised(verbose=True, balance_param=1e-5, prior='identity')
   sdml.fit(X, y)
   out, _ = capsys.readouterr()
   assert "SDML will use scikit-learn's graphical lasso solver." in out
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 007476c3..ff776a2a 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -57,27 +57,28 @@ def test_itml(self):
   def test_lsml(self):
     self.assertEqual(remove_spaces(str(metric_learn.LSML())),
                      remove_spaces("""
-LSML(init='identity', max_iter=1000, preprocessor=None, prior='deprecated',
+LSML(max_iter=1000, preprocessor=None, prior='identity',
    random_state=None, tol=0.001, verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())),
                      remove_spaces("""
-LSML_Supervised(init='identity', max_iter=1000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, prior='deprecated',
+LSML_Supervised(max_iter=1000, num_constraints=None,
+        num_labeled='deprecated', preprocessor=None, prior='identity',
         random_state=None, tol=0.001, verbose=False, weights=None)
 """))
 
   def test_sdml(self):
     self.assertEqual(remove_spaces(str(metric_learn.SDML())),
                      remove_spaces("""
-SDML(balance_param=0.5, init='identity', preprocessor=None, random_state=None,
+SDML(balance_param=0.5, preprocessor=None, prior='identity', random_state=None,
    sparsity_param=0.01, use_cov='deprecated', verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())),
                      remove_spaces("""
-SDML_Supervised(balance_param=0.5, init='identity', num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, random_state=None,
-        sparsity_param=0.01, use_cov='deprecated', verbose=False)
+SDML_Supervised(balance_param=0.5, num_constraints=None,
+        num_labeled='deprecated', preprocessor=None, prior='identity',
+        random_state=None, sparsity_param=0.01, use_cov='deprecated', 
+        verbose=False)
 """))
 
   def test_rca(self):
diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py
index 705258cb..d8dbafea 100644
--- a/test/test_fit_transform.py
+++ b/test/test_fit_transform.py
@@ -65,13 +65,13 @@ def test_lmnn(self):
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           init='identity')
+                           prior='identity')
     sdml.fit(self.X, self.y, random_state=seed)
     res_1 = sdml.transform(self.X)
 
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           init='identity')
+                           prior='identity')
     res_2 = sdml.fit_transform(self.X, self.y, random_state=seed)
 
     assert_array_almost_equal(res_1, res_2)
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 36c9e2c5..74c3a659 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -558,7 +558,7 @@ def test_init_mahalanobis(estimator, build_dataset):
                                      metric_learners)
                               if idml[:4] in ['ITML', 'SDML', 'LSML']])
 def test_singular_covariance_init(estimator, build_dataset):
-    """Tests that when using the 'covariance' init, it returns the
+    """Tests that when using the 'covariance' init or prior, it returns the
     appropriate error if the covariance matrix is singular, for algorithms
     that need a strictly PD prior or init (see
     https://github.com/metric-learn/metric-learn/issues/202 and
@@ -573,8 +573,12 @@ def test_singular_covariance_init(estimator, build_dataset):
     input_data = np.concatenate([input_data, input_data[:, ..., :2]
                                  .dot([[2], [3]])],
                                 axis=-1)
-
-    model.set_params(init='covariance')
+    if hasattr(model, 'init'):
+      model.set_params(init='covariance')
+    if hasattr(model, 'prior'):
+      model.set_params(prior='covariance')
+    if not hasattr(model, 'prior') and not hasattr(model, 'init'):
+      raise RuntimeError("Neither prior or init could be set in the model.")
     msg = ("Unable to get a true inverse of the covariance "
            "matrix since it is not definite. Try another "
            "initialization, or an algorithm that does not "
@@ -595,8 +599,8 @@ def test_singular_covariance_init(estimator, build_dataset):
                                      metric_learners)
                               if idml[:4] in ['ITML', 'SDML', 'LSML']])
 @pytest.mark.parametrize('w0', [1e-20, 0., -1e-20])
-def test_singular_array_init(estimator, build_dataset, w0):
-    """Tests that when using a custom array init, it returns the
+def test_singular_array_init_or_prior(estimator, build_dataset, w0):
+    """Tests that when using a custom array init (or prior), it returns the
     appropriate error if it is singular, for algorithms
     that need a strictly PD prior or init (see
     https://github.com/metric-learn/metric-learn/issues/202 and
@@ -612,7 +616,12 @@ def test_singular_array_init(estimator, build_dataset, w0):
     w = np.abs(rng.randn(X.shape[1]))
     w[0] = w0
     M = P.dot(np.diag(w)).dot(P.T)
-    model.set_params(init=M)
+    if hasattr(model, 'init'):
+      model.set_params(init=M)
+    if hasattr(model, 'prior'):
+      model.set_params(prior=M)
+    if not hasattr(model, 'prior') and not hasattr(model, 'init'):
+      raise RuntimeError("Neither prior or init could be set in the model.")
     msg = ("You should provide a strictly positive definite matrix. "
            "This one is not definite. Try another "
            "initialization, or an algorithm that does not "
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index 1838c45d..3d5f40b1 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -85,7 +85,7 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated',
                                num_constraints=num_constraints,
                                verbose=verbose,
                                preprocessor=preprocessor,
-                               balance_param=1e-5, init='identity')
+                               balance_param=1e-5, prior='identity')
     dSDML.__init__ = stable_init
     check_estimator(dSDML)
 
diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py
index 5615dc14..588e4004 100644
--- a/test/test_transformer_metric_conversion.py
+++ b/test/test_transformer_metric_conversion.py
@@ -50,7 +50,7 @@ def test_lmnn(self):
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, init='identity',
+    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
                            balance_param=1e-5)
     sdml.fit(self.X, self.y, random_state=seed)
     L = sdml.transformer_
diff --git a/test/test_utils.py b/test/test_utils.py
index 998a6077..04db24f7 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -107,7 +107,7 @@ def build_quadruplets(with_preprocessor=False):
                   # be solved
                   # TODO: remove this comment when #175 is solved
                   (MMC(max_iter=2), build_pairs),  # max_iter=2 to be faster
-                  (SDML(init='identity', balance_param=1e-5), build_pairs)]
+                  (SDML(prior='identity', balance_param=1e-5), build_pairs)]
 ids_pairs_learners = list(map(lambda x: x.__class__.__name__,
                               [learner for (learner, _) in
                                pairs_learners]))
@@ -121,7 +121,7 @@ def build_quadruplets(with_preprocessor=False):
                (LSML_Supervised(), build_classification),
                (MMC_Supervised(max_iter=5), build_classification),
                (RCA_Supervised(num_chunks=10), build_classification),
-               (SDML_Supervised(init='identity', balance_param=1e-5),
+               (SDML_Supervised(prior='identity', balance_param=1e-5),
                build_classification)]
 ids_classifiers = list(map(lambda x: x.__class__.__name__,
                            [learner for (learner, _) in

From 5b048b42f52567d8f648d401e6e07edca2f388d9 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 17 May 2019 09:39:58 +0200
Subject: [PATCH 32/52] Update error messages with either prior or init

---
 metric_learn/_util.py          |  30 +++--
 metric_learn/itml.py           |   3 +-
 metric_learn/lsml.py           |   3 +-
 metric_learn/mmc.py            |   3 +-
 metric_learn/sdml.py           |   3 +-
 test/test_mahalanobis_mixin.py | 203 ++++++++++++++++++---------------
 6 files changed, 140 insertions(+), 105 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 8a8bc05b..f5026f0d 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -588,7 +588,8 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
 
 
 def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
-                                   return_inverse=False, strict_pd=False):
+                                   return_inverse=False, strict_pd=False,
+                                   matrix_name='matrix'):
   """Returns a standard mahalanobis matrix that can be used as a prior or an
   initialization
 
@@ -633,6 +634,10 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     Whether to enforce that the provided matrix is definite (in addition to
     being PSD).
 
+  param_name : str, optional (default='matrix')
+    The name of the matrix used (example: 'init', 'prior'). Will be used in
+    error messages.
+
   Returns
   -------
   M, or (M, M_inv) : `numpy.ndarray`
@@ -646,20 +651,21 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     # Assert that init.shape[1] = n_features
     if (init.shape) != (n_features,) * 2:
       raise ValueError('The input dimensionality {} of the given '
-                       'mahalanobis matrix `init` must match the '
+                       'mahalanobis matrix `{}` must match the '
                        'dimensionality of the given inputs ({}).'
-                       .format(init.shape, n_features))
+                       .format(init.shape, matrix_name, n_features))
 
     # Assert that the matrix is symmetric
     if not np.allclose(init, init.T):
-      raise ValueError("The given matrix is not symmetric.")
+      raise ValueError("`{}` is not symmetric.".format(matrix_name))
 
   elif init in ['identity', 'covariance', 'random']:
     pass
   else:
     raise ValueError(
-        "`init` must be 'identity', 'covariance', 'random' "
-        "or a numpy array of shape (n_features, n_features).")
+        "`{}` must be 'identity', 'covariance', 'random' "
+        "or a numpy array of shape (n_features, n_features)."
+        .format(matrix_name))
 
   random_state = check_random_state(random_state)
   M = init
@@ -668,9 +674,10 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     init_is_definite = _check_sdp_from_eigen(s)
     if strict_pd and not init_is_definite:
       raise LinAlgError("You should provide a strictly positive definite "
-                        "matrix. This one is not definite. Try another "
-                        "initialization, or an algorithm that does not "
-                        "require the init to be strictly positive definite.")
+                        "matrix as `{}`. This one is not definite. Try another"
+                        " {}, or an algorithm that does not "
+                        "require the {} to be strictly positive definite."
+                        .format(*((matrix_name,) * 3)))
     if return_inverse:
       M_inv = np.dot(u / s, u.T)
   else:
@@ -691,8 +698,9 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
       if strict_pd and not cov_is_definite:
         raise LinAlgError("Unable to get a true inverse of the covariance "
                           "matrix since it is not definite. Try another "
-                          "initialization, or an algorithm that does not "
-                          "require the init to be strictly positive definite.")
+                          "`{}`, or an algorithm that does not "
+                          "require the `{}` to be strictly positive definite."
+                          .format(*((matrix_name,) * 2)))
       M = np.dot(u / s, u.T)
     elif init == 'random':
       # we need to create a random symmetric matrix
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 4c6b26be..18615212 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -114,7 +114,8 @@ def _fit(self, pairs, y, bounds=None):
     # init metric
     # pairs will be deduplicated into X two times, see how to avoid that
     A = _initialize_metric_mahalanobis(pairs, self.init, self.random_state,
-                                       strict_pd=True)
+                                       strict_pd=True,
+                                       matrix_name='init')
 
     gamma = self.gamma
     pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index b1eb303b..901c7bcd 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -93,7 +93,8 @@ def _fit(self, quadruplets, weights=None):
     self.w_ /= self.w_.sum()  # weights must sum to 1
     M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.prior,
                                                   return_inverse=True,
-                                                  strict_pd=True)
+                                                  strict_pd=True,
+                                                  matrix_name='prior')
 
     step_sizes = np.logspace(-10, 0, 10)
     # Keep track of the best step size and the loss at that step.
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 036c4951..192dfd28 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -110,7 +110,8 @@ def _fit(self, pairs, y):
                                     type_of_inputs='tuples')
 
     self.A_ = _initialize_metric_mahalanobis(pairs, self.init,
-                                             random_state=self.random_state)
+                                             random_state=self.random_state,
+                                             matrix_name='init')
 
     if self.diagonal:
       return self._fit_diag(pairs, y)
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 89ed02fa..ce1dbe51 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -111,7 +111,8 @@ def _fit(self, pairs, y):
     # set up (the inverse of) the prior M
     _, prior_inv = _initialize_metric_mahalanobis(pairs, self.prior,
                                                   return_inverse=True,
-                                                  strict_pd=True)
+                                                  strict_pd=True,
+                                                  matrix_name='prior')
     diff = pairs[:, 0] - pairs[:, 1]
     loss_matrix = (diff.T * y).dot(diff)
     emp_cov = prior_inv + self.balance_param * loss_matrix
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 74c3a659..025e0b08 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -143,6 +143,9 @@ def test_embed_dim(estimator, build_dataset):
   assert str(raised_error.value) == err_msg
   # we test that the shape is also OK when doing dimensionality reduction
   if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}:
+    # TODO:
+    #  avoid this enumeration and rather test if hasattr n_components
+    #  as soon as we have made the arguments names as such (issue #167)
     model.set_params(num_dims=2)
     model.fit(*remove_y_quadruplets(estimator, input_data, labels))
     assert model.transform(X).shape == (X.shape[0], 2)
@@ -487,65 +490,74 @@ def test_init_mahalanobis(estimator, build_dataset):
     scikit-learn) that the init has an expected behaviour.
     """
     input_data, labels, _, X = build_dataset()
-    model = clone(estimator)
-    set_random_state(model)
-    rng = np.random.RandomState(42)
-
-    # Start learning from scratch
-    model.set_params(init='identity')
-    model.fit(input_data, labels)
-
-    # Initialize with random
-    model.set_params(init='random')
-    model.fit(input_data, labels)
 
-    # Initialize with covariance
-    model.set_params(init='covariance')
-    model.fit(input_data, labels)
+    matrices_to_set = []
+    if hasattr(estimator, 'init'):
+      matrices_to_set.append('init')
+    if hasattr(estimator, 'prior'):
+      matrices_to_set.append('prior')
 
-    # Initialize with a random spd matrix
-    init = make_spd_matrix(X.shape[1], random_state=rng)
-    model.set_params(init=init)
-    model.fit(input_data, labels)
+    for param in matrices_to_set:
+      model = clone(estimator)
+      set_random_state(model)
+      rng = np.random.RandomState(42)
 
-    # init.shape[1] must match X.shape[1]
-    init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1)
-    model.set_params(init=init)
-    msg = ('The input dimensionality {} of the given '
-           'mahalanobis matrix `init` must match the '
-           'dimensionality of the given inputs ({}).'
-           .format(init.shape, input_data.shape[-1]))
-    with pytest.raises(ValueError) as raised_error:
+      # Start learning from scratch
+      model.set_params(**{param: 'identity'})
       model.fit(input_data, labels)
-    assert str(raised_error.value) == msg
 
-    # The input matrix must be symmetric
-    init = rng.rand(X.shape[1], X.shape[1])
-    model.set_params(init=init)
-    msg = ("The given matrix is not symmetric.")
-    with pytest.raises(ValueError) as raised_error:
+      # Initialize with random
+      model.set_params(**{param: 'random'})
       model.fit(input_data, labels)
-    assert str(raised_error.value) == msg
 
-    # The input matrix must be SPD
-    P = ortho_group.rvs(X.shape[1], random_state=rng)
-    w = np.abs(rng.randn(X.shape[1]))
-    w[0] = -10.
-    M = P.dot(np.diag(w)).dot(P.T)
-    model.set_params(init=M)
-    msg = ("Matrix is not positive semidefinite (PSD).")
-    with pytest.raises(NonPSDError) as raised_err:
+      # Initialize with covariance
+      model.set_params(**{param: 'covariance'})
       model.fit(input_data, labels)
-    assert str(raised_err.value) == msg
 
-    # init must be as specified in the docstring
-    model.set_params(init=1)
-    msg = ("`init` must be 'identity', 'covariance', "
-           "'random' or a numpy array of shape "
-           "(n_features, n_features).")
-    with pytest.raises(ValueError) as raised_error:
+      # Initialize with a random spd matrix
+      init = make_spd_matrix(X.shape[1], random_state=rng)
+      model.set_params(**{param: init})
       model.fit(input_data, labels)
-    assert str(raised_error.value) == msg
+
+      # init.shape[1] must match X.shape[1]
+      init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1)
+      model.set_params(**{param: init})
+      msg = ('The input dimensionality {} of the given '
+             'mahalanobis matrix `{}` must match the '
+             'dimensionality of the given inputs ({}).'
+             .format(init.shape, param, input_data.shape[-1]))
+
+      with pytest.raises(ValueError) as raised_error:
+        model.fit(input_data, labels)
+      assert str(raised_error.value) == msg
+
+      # The input matrix must be symmetric
+      init = rng.rand(X.shape[1], X.shape[1])
+      model.set_params(**{param: init})
+      msg = ("`{}` is not symmetric.".format(param))
+      with pytest.raises(ValueError) as raised_error:
+        model.fit(input_data, labels)
+      assert str(raised_error.value) == msg
+
+      # The input matrix must be SPD
+      P = ortho_group.rvs(X.shape[1], random_state=rng)
+      w = np.abs(rng.randn(X.shape[1]))
+      w[0] = -10.
+      M = P.dot(np.diag(w)).dot(P.T)
+      model.set_params(**{param: M})
+      msg = ("Matrix is not positive semidefinite (PSD).")
+      with pytest.raises(NonPSDError) as raised_err:
+        model.fit(input_data, labels)
+      assert str(raised_err.value) == msg
+
+      # init must be as specified in the docstring
+      model.set_params(**{param: 1})
+      msg = ("`{}` must be 'identity', 'covariance', "
+             "'random' or a numpy array of shape "
+             "(n_features, n_features).".format(param))
+      with pytest.raises(ValueError) as raised_error:
+        model.fit(input_data, labels)
+      assert str(raised_error.value) == msg
 
 
 @pytest.mark.parametrize('estimator, build_dataset',
@@ -557,7 +569,7 @@ def test_init_mahalanobis(estimator, build_dataset):
                               in zip(ids_metric_learners,
                                      metric_learners)
                               if idml[:4] in ['ITML', 'SDML', 'LSML']])
-def test_singular_covariance_init(estimator, build_dataset):
+def test_singular_covariance_init_or_prior(estimator, build_dataset):
     """Tests that when using the 'covariance' init or prior, it returns the
     appropriate error if the covariance matrix is singular, for algorithms
     that need a strictly PD prior or init (see
@@ -565,27 +577,30 @@ def test_singular_covariance_init(estimator, build_dataset):
     https://github.com/metric-learn/metric-learn/pull/195#issuecomment
     -492332451)
     """
+    matrices_to_set = []
+    if hasattr(estimator, 'init'):
+      matrices_to_set.append('init')
+    if hasattr(estimator, 'prior'):
+      matrices_to_set.append('prior')
+
     input_data, labels, _, X = build_dataset()
-    model = clone(estimator)
-    set_random_state(model)
-    # We create a feature that is a linear combination of the first two
-    # features:
-    input_data = np.concatenate([input_data, input_data[:, ..., :2]
-                                 .dot([[2], [3]])],
-                                axis=-1)
-    if hasattr(model, 'init'):
-      model.set_params(init='covariance')
-    if hasattr(model, 'prior'):
-      model.set_params(prior='covariance')
-    if not hasattr(model, 'prior') and not hasattr(model, 'init'):
-      raise RuntimeError("Neither prior or init could be set in the model.")
-    msg = ("Unable to get a true inverse of the covariance "
-           "matrix since it is not definite. Try another "
-           "initialization, or an algorithm that does not "
-           "require the init to be strictly positive definite.")
-    with pytest.raises(LinAlgError) as raised_err:
-      model.fit(input_data, labels)
-    assert str(raised_err.value) == msg
+    for param in matrices_to_set:
+      model = clone(estimator)
+      set_random_state(model)
+      # We create a feature that is a linear combination of the first two
+      # features:
+      input_data = np.concatenate([input_data, input_data[:, ..., :2]
+                                   .dot([[2], [3]])],
+                                  axis=-1)
+      model.set_params(**{param: 'covariance'})
+      msg = ("Unable to get a true inverse of the covariance "
+             "matrix since it is not definite. Try another "
+             "`{}`, or an algorithm that does not "
+             "require the `{}` to be strictly positive definite."
+             .format(param, param))
+      with pytest.raises(LinAlgError) as raised_err:
+        model.fit(input_data, labels)
+      assert str(raised_err.value) == msg
 
 
 @pytest.mark.integration
@@ -607,25 +622,33 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0):
     https://github.com/metric-learn/metric-learn/pull/195#issuecomment
     -492332451)
     """
+    matrices_to_set = []
+    if hasattr(estimator, 'init'):
+      matrices_to_set.append('init')
+    if hasattr(estimator, 'prior'):
+      matrices_to_set.append('prior')
+
     rng = np.random.RandomState(42)
     input_data, labels, _, X = build_dataset()
-    model = clone(estimator)
-    set_random_state(model)
-
-    P = ortho_group.rvs(X.shape[1], random_state=rng)
-    w = np.abs(rng.randn(X.shape[1]))
-    w[0] = w0
-    M = P.dot(np.diag(w)).dot(P.T)
-    if hasattr(model, 'init'):
-      model.set_params(init=M)
-    if hasattr(model, 'prior'):
-      model.set_params(prior=M)
-    if not hasattr(model, 'prior') and not hasattr(model, 'init'):
-      raise RuntimeError("Neither prior or init could be set in the model.")
-    msg = ("You should provide a strictly positive definite matrix. "
-           "This one is not definite. Try another "
-           "initialization, or an algorithm that does not "
-           "require the init to be strictly positive definite.")
-    with pytest.raises(LinAlgError) as raised_err:
-      model.fit(input_data, labels)
-    assert str(raised_err.value) == msg
+    for param in matrices_to_set:
+      model = clone(estimator)
+      set_random_state(model)
+
+      P = ortho_group.rvs(X.shape[1], random_state=rng)
+      w = np.abs(rng.randn(X.shape[1]))
+      w[0] = w0
+      M = P.dot(np.diag(w)).dot(P.T)
+      if hasattr(model, 'init'):
+        model.set_params(init=M)
+      if hasattr(model, 'prior'):
+        model.set_params(prior=M)
+      if not hasattr(model, 'prior') and not hasattr(model, 'init'):
+        raise RuntimeError("Neither prior or init could be set in the model.")
+      msg = ("You should provide a strictly positive definite "
+             "matrix as `{}`. This one is not definite. Try another"
+             " {}, or an algorithm that does not "
+             "require the {} to be strictly positive definite."
+             .format(*(param,) * 3))
+      with pytest.raises(LinAlgError) as raised_err:
+        model.fit(input_data, labels)
+      assert str(raised_err.value) == msg

From d96930d126508f7890096eaa64d334470dbe196b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 17 May 2019 14:39:49 +0200
Subject: [PATCH 33/52] Remove message

---
 metric_learn/itml.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 18615212..5ba472ee 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -87,10 +87,8 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     self.gamma = gamma
     self.max_iter = max_iter
     self.convergence_threshold = convergence_threshold
-    self.init = init  # explain that it is good to keep the scale with
-    # the bounds
-    # TODO: see for other inits how it behave wrt the bound
-    self.A0 = A0  # TODO: deprecate
+    self.init = init
+    self.A0 = A0
     self.verbose = verbose
     self.random_state = random_state
     super(_BaseITML, self).__init__(preprocessor)

From 2de3d4ca59d4a33db977b35ce458cb7e98ba824d Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Sat, 18 May 2019 11:42:31 +0200
Subject: [PATCH 34/52] A few nitpicks

---
 metric_learn/_util.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index f5026f0d..311dd54c 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -607,10 +607,11 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
             An identity matrix of shape (n_features, n_features).
 
          'covariance'
-            The inverse covariance matrix.
+            The (pseudo-)inverse covariance matrix (raises an error if the
+            covariance matrix is not definite and `strict_pd == True`)
 
          'random'
-             The initial transformation will be a random SPD matrix of shape
+             The initial transformation will be a random PD matrix of shape
              `(n_features, n_features)`, generated using
              `sklearn.datasets.make_spd_matrix`.
 
@@ -626,9 +627,11 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     argument to PCA when initializing the matrix.
 
   return_inverse : bool, optional (default=False)
-    Whether to return the inverse of the matrix initializing the metric. This
+    Whether to return the inverse of the specified matrix. This
     can be sometimes useful. It will return the pseudo-inverse (which is the
-    same as the inverse if the matrix is definite (i.e. invertible))
+    same as the inverse if the matrix is definite (i.e. invertible)). If
+    `strict_pd == True` and the matrix is not definite, it will return an
+    error.
 
   strict_pd : bool, optional (default=False)
     Whether to enforce that the provided matrix is definite (in addition to

From 499a2962ef5e5387f7b82cfda714bc9ed9268c8c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Sat, 18 May 2019 11:54:10 +0200
Subject: [PATCH 35/52] PEP8 errors + change init in test

---
 test/metric_learn_test.py      | 12 ++++++------
 test/test_base_metric.py       |  2 +-
 test/test_mahalanobis_mixin.py |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index c8da4084..1687c85c 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -5,7 +5,8 @@
 from scipy.optimize import check_grad
 from six.moves import xrange
 from sklearn.metrics import pairwise_distances
-from sklearn.datasets import load_iris, make_classification, make_regression
+from sklearn.datasets import load_iris, make_classification, make_regression, \
+  make_spd_matrix
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_warns_message
 from sklearn.exceptions import ConvergenceWarning
@@ -18,7 +19,7 @@
   HAS_SKGGM = True
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           LSML_Supervised, ITML_Supervised, SDML_Supervised,
-                          RCA_Supervised, MMC_Supervised, SDML, ITML, LSML)
+                          RCA_Supervised, MMC_Supervised, SDML, ITML)
 # Import this specially for testing.
 from metric_learn.constraints import wrap_pairs
 from metric_learn.lmnn import python_LMNN
@@ -516,8 +517,7 @@ def test_singleton_class(self):
       X = X[[ind_0[0], ind_1[0], ind_2[0]]]
       y = y[[ind_0[0], ind_1[0], ind_2[0]]]
 
-      EPS = np.finfo(float).eps
-      A = np.zeros((X.shape[1], X.shape[1]))
+      A = make_spd_matrix(X.shape[1], X.shape[1])
       nca = NCA(init=A, max_iter=30, num_dims=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.transformer_, A)
@@ -527,8 +527,8 @@ def test_one_class(self):
       #  must stay like the initialization
       X = self.iris_points[self.iris_labels == 0]
       y = self.iris_labels[self.iris_labels == 0]
-      EPS = np.finfo(float).eps
-      A = np.zeros((X.shape[1], X.shape[1]))
+      
+      A = make_spd_matrix(X.shape[1], X.shape[1])
       nca = NCA(init=A, max_iter=30, num_dims=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.transformer_, A)
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index ff776a2a..3215457e 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -77,7 +77,7 @@ def test_sdml(self):
                      remove_spaces("""
 SDML_Supervised(balance_param=0.5, num_constraints=None,
         num_labeled='deprecated', preprocessor=None, prior='identity',
-        random_state=None, sparsity_param=0.01, use_cov='deprecated', 
+        random_state=None, sparsity_param=0.01, use_cov='deprecated',
         verbose=False)
 """))
 
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 025e0b08..868cf87d 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -455,7 +455,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, num_dims,
       has_classes = model_base.__class__.__name__ in ids_classifiers
       if has_classes:
         labels = np.tile(range(n_classes), n_samples //
-                          n_classes + 1)[:n_samples]
+                         n_classes + 1)[:n_samples]
       else:
         labels = np.tile(labels, n_samples // labels.shape[0] + 1)[:n_samples]
       model = clone(model_base)

From c371d0c94b6b0dfc62c729966c90f55ff5434244 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Sat, 18 May 2019 11:55:28 +0200
Subject: [PATCH 36/52] STY: PEP8 fixes

---
 test/metric_learn_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 1687c85c..51f2be00 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -5,8 +5,8 @@
 from scipy.optimize import check_grad
 from six.moves import xrange
 from sklearn.metrics import pairwise_distances
-from sklearn.datasets import load_iris, make_classification, make_regression, \
-  make_spd_matrix
+from sklearn.datasets import (load_iris, make_classification, make_regression,
+                              make_spd_matrix)
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_warns_message
 from sklearn.exceptions import ConvergenceWarning
@@ -527,7 +527,7 @@ def test_one_class(self):
       #  must stay like the initialization
       X = self.iris_points[self.iris_labels == 0]
       y = self.iris_labels[self.iris_labels == 0]
-      
+
       A = make_spd_matrix(X.shape[1], X.shape[1])
       nca = NCA(init=A, max_iter=30, num_dims=X.shape[1])
       nca.fit(X, y)

From b63d017a0daa26d57305bc4e193c97a615f320d1 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 20 May 2019 09:30:22 +0200
Subject: [PATCH 37/52] Address and remove TODOs

---
 metric_learn/_util.py | 11 +++++++----
 metric_learn/sdml.py  |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 311dd54c..e784b493 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -512,7 +512,9 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
     authorized_inits.append('lda')
 
   if isinstance(init, np.ndarray):
-    init = check_array(init)
+    # we copy the array, so that if we update the metric, we don't want to
+    # update the init
+    init = check_array(init, copy=True)
 
     # Assert that init.shape[1] = X.shape[1]
     if init.shape[1] != n_features:
@@ -648,8 +650,9 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
   """
   n_features = input.shape[-1]
   if isinstance(init, np.ndarray):
-    init = check_array(init)  # TODO: do we want to copy the array ?
-    # see how they do it in scikit-learn for instance
+    # we copy the array, so that if we update the metric, we don't want to
+    # update the init
+    init = check_array(init, copy=True)
 
     # Assert that init.shape[1] = n_features
     if (init.shape) != (n_features,) * 2:
@@ -694,8 +697,8 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
         X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
       else:
         X = input
+      # atleast2d is necessary to deal with scalar covariance matrices
       M_inv = np.atleast_2d(np.cov(X, rowvar=False))
-      # TODO: check atleast_2d necessary
       s, u = scipy.linalg.eigh(M_inv)
       cov_is_definite = _check_sdp_from_eigen(s)
       if strict_pd and not cov_is_definite:
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index ce1dbe51..61b9304d 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -88,7 +88,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
     self.balance_param = balance_param
     self.sparsity_param = sparsity_param
     self.prior = prior
-    self.use_cov = use_cov  # TODO: deprecate and replace by init
+    self.use_cov = use_cov
     self.verbose = verbose
     self.random_state = random_state
     super(_BaseSDML, self).__init__(preprocessor)

From a5a6af8935f35edbce3997018f77df92e5f94b9b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 3 Jun 2019 18:18:37 +0200
Subject: [PATCH 38/52] Replace init by prior for ITML

---
 metric_learn/itml.py     | 40 +++++++++++++++++++---------------------
 test/test_base_metric.py |  9 +++++----
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 5ba472ee..71526c48 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -32,7 +32,7 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               init='identity', A0='deprecated', verbose=False,
+               prior='identity', A0='deprecated', verbose=False,
                preprocessor=None, random_state=None):
     """Initialize ITML.
 
@@ -45,10 +45,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
     convergence_threshold : float, optional
 
-    init : string or numpy array, optional (default='identity')
+    prior : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For ITML, the init should be strictly
+         (n_features, n_features). For ITML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -58,7 +58,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The prior will be a random SPD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -70,7 +70,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     A0 : Not used
       .. deprecated:: 0.5.0
          `A0` was deprecated in version 0.5.0 and will
-         be removed in 0.6.0. Use 'init' instead.
+         be removed in 0.6.0. Use 'prior' instead.
 
     verbose : bool, optional
         if True, prints information while learning
@@ -81,13 +81,12 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
 
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``prior='random'``, ``random_state`` is used to set the prior.
     """
     self.gamma = gamma
     self.max_iter = max_iter
     self.convergence_threshold = convergence_threshold
-    self.init = init
+    self.prior = prior
     self.A0 = A0
     self.verbose = verbose
     self.random_state = random_state
@@ -97,7 +96,7 @@ def _fit(self, pairs, y, bounds=None):
     if self.A0 != 'deprecated':
       warnings.warn('"A0" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "init" instead.',
+                    'removed in 0.6.0. Use "prior" instead.',
                     DeprecationWarning)
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
@@ -109,11 +108,11 @@ def _fit(self, pairs, y, bounds=None):
       assert len(bounds) == 2
       self.bounds_ = bounds
     self.bounds_[self.bounds_ == 0] = 1e-9
-    # init metric
-    # pairs will be deduplicated into X two times, see how to avoid that
-    A = _initialize_metric_mahalanobis(pairs, self.init, self.random_state,
+    # set the prior
+    # pairs will be deduplicated into X two times, TODO: avoid that
+    A = _initialize_metric_mahalanobis(pairs, self.prior, self.random_state,
                                        strict_pd=True,
-                                       matrix_name='init')
+                                       matrix_name='prior')
 
     gamma = self.gamma
     pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
@@ -259,7 +258,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
                num_labeled='deprecated', num_constraints=None,
-               bounds='deprecated', init='identity', A0='deprecated',
+               bounds='deprecated', prior='identity', A0='deprecated',
                verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `ITML`.
 
@@ -285,10 +284,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
           be removed in 0.6.0. Set `bounds` at fit time instead :
           `itml_supervised.fit(X, y, bounds=...)`
 
-    init : string or numpy array, optional (default='identity')
+    prior : string or numpy array, optional (default='identity')
          Initialization of the linear transformation. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For ITML, the init should be strictly
+         (n_features, n_features). For ITML, the prior should be strictly
          positive definite (PD).
 
          'identity'
@@ -298,7 +297,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The prior will be a random SPD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -310,7 +309,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     A0 : Not used
       .. deprecated:: 0.5.0
          `A0` was deprecated in version 0.5.0 and will
-         be removed in 0.6.0. Use 'init' instead.
+         be removed in 0.6.0. Use 'prior' instead.
     verbose : bool, optional
         if True, prints information while learning
     preprocessor : array-like, shape=(n_samples, n_features) or callable
@@ -318,12 +317,11 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
         tuples will be formed like this: X[indices].
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``prior='random'``, ``random_state`` is used to set the prior.
     """
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
                        convergence_threshold=convergence_threshold,
-                       A0=A0, init=init, verbose=verbose,
+                       A0=A0, prior=prior, verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 3215457e..b8ee4803 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -43,15 +43,16 @@ def test_lfda(self):
   def test_itml(self):
     self.assertEqual(remove_spaces(str(metric_learn.ITML())),
                      remove_spaces("""
-ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, init='identity',
-   max_iter=1000, preprocessor=None, random_state=None, verbose=False)
+ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0,
+   max_iter=1000, preprocessor=None, prior='identity', random_state=None, 
+   verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())),
                      remove_spaces("""
 ITML_Supervised(A0='deprecated', bounds='deprecated',
-        convergence_threshold=0.001, gamma=1.0, init='identity',
+        convergence_threshold=0.001, gamma=1.0,
         max_iter=1000, num_constraints=None, num_labeled='deprecated',
-        preprocessor=None, random_state=None, verbose=False)
+        preprocessor=None, prior='identity', random_state=None, verbose=False)
 """))
 
   def test_lsml(self):

From 9c4d70d1864b7453be519b7443fa992cc646885c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 3 Jun 2019 18:21:31 +0200
Subject: [PATCH 39/52] TST: fix ITML test with init changed into prior

---
 test/metric_learn_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 51f2be00..acd8695f 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -118,7 +118,7 @@ def test_deprecation_A0(self):
     itml_supervised = ITML_Supervised(A0=np.ones_like(X))
     msg = ('"A0" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "init" instead.')
+           'removed in 0.6.0. Use "prior" instead.')
     with pytest.warns(DeprecationWarning) as raised_warning:
       itml_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)

From 8cb9c422237afdc6868b417cea33a8a184e17a06 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 4 Jun 2019 08:58:44 +0200
Subject: [PATCH 40/52] Add precision for MMC

---
 metric_learn/mmc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 192dfd28..44546191 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -75,7 +75,8 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
           be removed in 0.6.0. Use 'init' instead.
     diagonal : bool, optional
         if True, a diagonal metric will be learned,
-        i.e., a simple scaling of dimensions
+        i.e., a simple scaling of dimensions. The initialization will then
+        be the diagonal coefficients of the matrix given as 'init'.
     diagonal_c : float, optional
         weight of the dissimilarity constraint for diagonal
         metric learning

From b40e75e788a98347f25a1ad587874edcf0180063 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 5 Jun 2019 10:32:23 +0200
Subject: [PATCH 41/52] Add ChangedBehaviorWarning for the algorithms that
 changed

---
 metric_learn/lsml.py      |   9 ++++
 metric_learn/mlkr.py      |  12 +++--
 metric_learn/mmc.py       |  11 +++-
 metric_learn/nca.py       |  12 ++++-
 metric_learn/sdml.py      |  11 +++-
 test/metric_learn_test.py | 107 +++++++++++++++++++++++++++++++++++++-
 6 files changed, 152 insertions(+), 10 deletions(-)

diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 901c7bcd..2a694dd8 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -20,6 +20,7 @@
 import scipy.linalg
 from six.moves import xrange
 from sklearn.base import TransformerMixin
+from sklearn.exceptions import ChangedBehaviorWarning
 
 from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints
@@ -91,6 +92,14 @@ def _fit(self, quadruplets, weights=None):
     else:
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
+    # if the prior is the default (identity), we raise a warning just in case
+    if self.prior == 'identity':
+      msg = ("Warning, as of version 0.5.0, the default prior is now "
+             "'identity', instead of 'covariance'. If you still want to use "
+             "the inverse of the covariance matrix as a prior, "
+             "set 'prior'=='covariance'. This warning will disappear in "
+             "v0.6.0.")
+      warnings.warn(msg, ChangedBehaviorWarning)
     M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.prior,
                                                   return_inverse=True,
                                                   strict_pd=True,
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 34b55d98..f596ee49 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -14,13 +14,10 @@
 import sys
 import warnings
 import numpy as np
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
 from sklearn.utils.fixes import logsumexp
 from scipy.optimize import minimize
-from scipy.spatial.distance import pdist, squareform
 from sklearn.base import TransformerMixin
-from sklearn.decomposition import PCA
-
 
 from sklearn.metrics import pairwise_distances
 from .base_metric import MahalanobisMixin
@@ -141,6 +138,13 @@ def fit(self, X, y):
       m = self.num_dims
       if m is None:
           m = d
+      # if the init is the default (identity), we raise a warning just in case
+      if self.init == 'auto':
+        msg = ("Warning, as of version 0.5.0, the default init is now "
+               "'auto', instead of 'pca'. If you still want to use "
+               "PCA as an init, set 'init'=='pca'. This warning will "
+               "disappear in v0.6.0.")
+        warnings.warn(msg, ChangedBehaviorWarning)
       A = _initialize_transformer(m, X, y, init=self.init,
                                   random_state=self.random_state,
                                   # MLKR works on regression targets:
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 44546191..efe70f3b 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -21,7 +21,8 @@
 import numpy as np
 from six.moves import xrange
 from sklearn.base import TransformerMixin
-from sklearn.utils.validation import check_array, assert_all_finite
+from sklearn.utils.validation import assert_all_finite
+from sklearn.exceptions import ChangedBehaviorWarning
 
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
@@ -110,6 +111,14 @@ def _fit(self, pairs, y):
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
+    msg = ("Warning, as of version 0.5.0, the default prior is now "
+           "'identity', instead of the identity divided by a scaling factor "
+           "of 10. If you still want to use the same init as in previous "
+           "versions, set 'init' == np.eye(d)/10, where d is the dimension "
+           "of your input space (d=pairs.shape[1]). "
+           "This warning will disappear in v0.6.0.")
+    warnings.warn(msg, ChangedBehaviorWarning)
+
     self.A_ = _initialize_metric_mahalanobis(pairs, self.init,
                                              random_state=self.random_state,
                                              matrix_name='init')
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index ba3e0761..c09b1ff1 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -19,7 +19,7 @@
 import numpy as np
 from scipy.optimize import minimize
 from sklearn.metrics import pairwise_distances
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
 from sklearn.utils.fixes import logsumexp
 from sklearn.base import TransformerMixin
 
@@ -128,7 +128,15 @@ def fit(self, X, y):
     # Measure the total training time
     train_time = time.time()
 
-    # Initialize A to a scaling matrix
+    # Initialize A
+    # if the init is the default (auto), we raise a warning just in case
+    if self.init == 'auto':
+      msg = ("Warning, as of version 0.5.0, the default init is now "
+             "'auto', instead of the previous scaling matrix. If you still "
+             "want to use the same scaling matrix as before as an init, "
+             "set 'init'==np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min("
+             "axis=0), EPS))). This warning will disappear in v0.6.0.")
+      warnings.warn(msg, ChangedBehaviorWarning)
     A = _initialize_transformer(num_dims, X, labels, self.init, self.verbose)
 
     # Run NCA
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 61b9304d..fbfc8645 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -18,7 +18,7 @@
 from sklearn.base import TransformerMixin
 from scipy.linalg import pinvh
 from sklearn.covariance import graphical_lasso
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
 
 from .base_metric import MahalanobisMixin, _PairsClassifierMixin
 from .constraints import Constraints, wrap_pairs
@@ -109,6 +109,15 @@ def _fit(self, pairs, y):
                                     type_of_inputs='tuples')
 
     # set up (the inverse of) the prior M
+    # if the prior is the default (identity), we raise a warning just in case
+    if self.prior == 'identity':
+      msg = ("Warning, as of version 0.5.0, the default prior is now "
+             "'identity', instead of 'covariance'. If you still want to use "
+             "the inverse of the covariance matrix as a prior, "
+             "set 'prior'=='covariance' (it was the default in previous "
+             "versions since there was 'use_cov'==True). "
+             "This warning will disappear in v0.6.0.")
+      warnings.warn(msg, ChangedBehaviorWarning)
     _, prior_inv = _initialize_metric_mahalanobis(pairs, self.prior,
                                                   return_inverse=True,
                                                   strict_pd=True,
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index acd8695f..cc022e6b 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -9,7 +9,7 @@
                               make_spd_matrix)
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_warns_message
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
 from sklearn.utils.validation import check_X_y
 try:
   from inverse_covariance import quic
@@ -19,7 +19,7 @@
   HAS_SKGGM = True
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           LSML_Supervised, ITML_Supervised, SDML_Supervised,
-                          RCA_Supervised, MMC_Supervised, SDML, ITML)
+                          RCA_Supervised, MMC_Supervised, SDML, ITML, LSML)
 # Import this specially for testing.
 from metric_learn.constraints import wrap_pairs
 from metric_learn.lmnn import python_LMNN
@@ -75,6 +75,29 @@ def test_deprecation_num_labeled(self):
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y)
 
+  def test_changed_behaviour_warning(self):
+    # test that a ChangedBehavior warning is thrown about the init, if the
+    # default parameters are used.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    lsml_supervised = LSML_Supervised()
+    msg = ("Warning, as of version 0.5.0, the default prior is now "
+           "'identity', instead of 'covariance'. If you still want to use "
+           "the inverse of the covariance matrix as a prior, "
+           "set 'prior'=='covariance'. This warning will disappear in "
+           "v0.6.0.")
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      lsml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.], [-5., 3.], [5., 0.]],
+                      [[0., 50.], [0., -60], [-10., 0.], [10., 0.]]])
+    lsml = LSML()
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      lsml.fit(pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestITML(MetricTestCase):
   def test_iris(self):
@@ -370,6 +393,31 @@ def test_deprecation_use_cov(self):
       sdml.fit(pairs, y_pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_changed_behaviour_warning(self):
+    # test that a ChangedBehavior warning is thrown about the init, if the
+    # default parameters are used (except for the balance_param that we need
+    # to set for the algorithm to not diverge)
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    sdml_supervised = SDML_Supervised(balance_param=1e-5)
+    msg = ("Warning, as of version 0.5.0, the default prior is now "
+           "'identity', instead of 'covariance'. If you still want to use "
+           "the inverse of the covariance matrix as a prior, "
+           "set 'prior'=='covariance' (it was the default in previous "
+           "versions since there was 'use_cov'==True). "
+           "This warning will disappear in v0.6.0.")
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      sdml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
+    y_pairs = [1, -1]
+    sdml = SDML(balance_param=1e-5)
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      sdml.fit(pairs, y_pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.skipif(not HAS_SKGGM,
                     reason='The message should be printed only if skggm is '
@@ -533,6 +581,22 @@ def test_one_class(self):
       nca.fit(X, y)
       assert_array_equal(nca.transformer_, A)
 
+  def test_changed_behaviour_warning(self):
+    # test that a ChangedBehavior warning is thrown about the init, if the
+    # default parameters are used.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    nca = NCA()
+    msg = ("Warning, as of version 0.5.0, the default init is now "
+           "'auto', instead of the previous scaling matrix. If you still "
+           "want to use the same scaling matrix as before as an init, "
+           "set 'init'==np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min("
+           "axis=0), EPS))). This warning will disappear in v0.6.0.")
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      nca.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestLFDA(MetricTestCase):
   def test_iris(self):
@@ -614,6 +678,21 @@ def test_deprecation_A0(self):
       mlkr.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_changed_behaviour_warning(self):
+    # test that a ChangedBehavior warning is thrown about the init, if the
+    # default parameters are used.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([0.1, 0.2, 0.3, 0.4])
+    mlkr = MLKR()
+    msg = ("Warning, as of version 0.5.0, the default init is now "
+           "'auto', instead of 'pca'. If you still want to use "
+           "PCA as an init, set 'init'=='pca'. This warning will "
+           "disappear in v0.6.0.")
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      mlkr.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestMMC(MetricTestCase):
   def test_iris(self):
@@ -687,6 +766,30 @@ def test_deprecation_A0(self):
       mmc.fit(pairs, y_pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_changed_behaviour_warning(self):
+    # test that a ChangedBehavior warning is thrown about the init, if the
+    # default parameters are used.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    mmc_supervised = MMC_Supervised()
+    msg = ("Warning, as of version 0.5.0, the default prior is now "
+           "'identity', instead of the identity divided by a scaling factor "
+           "of 10. If you still want to use the same init as in previous "
+           "versions, set 'init' == np.eye(d)/10, where d is the dimension "
+           "of your input space (d=pairs.shape[1]). "
+           "This warning will disappear in v0.6.0.")
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      mmc_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
+    y_pairs = [1, -1]
+    mmc = MMC()
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      mmc.fit(pairs, y_pairs)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.parametrize(('algo_class', 'dataset'),
                          [(NCA, make_classification()),

From cec35aba64e82459aa90e7c7f2208eeeb786110a Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 5 Jun 2019 12:37:05 +0200
Subject: [PATCH 42/52] Address
 https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-245440568

---
 metric_learn/_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index e784b493..7be39068 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -556,7 +556,7 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
     if init == 'auto':
       if has_classes:
         n_classes = len(np.unique(y))
-      if (has_classes and num_dims <= min(n_features, n_classes - 1)):
+      if has_classes and num_dims <= min(n_features, n_classes - 1):
         init = 'lda'
       elif num_dims < min(n_features, n_samples):
         init = 'pca'
@@ -655,7 +655,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     init = check_array(init, copy=True)
 
     # Assert that init.shape[1] = n_features
-    if (init.shape) != (n_features,) * 2:
+    if init.shape != (n_features,) * 2:
       raise ValueError('The input dimensionality {} of the given '
                        'mahalanobis matrix `{}` must match the '
                        'dimensionality of the given inputs ({}).'

From 617ab0a620466414ba274d4153059deb17112061 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 5 Jun 2019 12:44:10 +0200
Subject: [PATCH 43/52] Remove the warnings check since we now have a
 ChangedBehaviorWarning

---
 test/metric_learn_test.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 2c6c5218..72ec8486 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -379,7 +379,7 @@ def test_sdml_raises_warning_msg_not_installed_skggm(self):
                              "installed.")
   def test_sdml_raises_warning_msg_installed_skggm(self):
     """Tests that the right warning message is raised if someone tries to
-    use SDML but has not installed skggm, and that the algorithm fails to
+    use SDML and has installed skggm, and that the algorithm fails to
     converge"""
     # TODO: remove if we don't need skggm anymore
     # case on which we know that skggm's graphical lasso fails
@@ -422,7 +422,7 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self):
                              "that no warning should be thrown.")
   def test_raises_no_warning_installed_skggm(self):
     # otherwise we should be able to instantiate and fit SDML and it
-    # should raise no warning
+    # should raise no error
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
@@ -430,10 +430,8 @@ def test_raises_no_warning_installed_skggm(self):
       sdml = SDML(prior='covariance')
       sdml.fit(pairs, y_pairs)
     assert len(record) == 0
-    with pytest.warns(None) as record:
-      sdml = SDML_Supervised(prior='identity', balance_param=1e-5)
-      sdml.fit(X, y)
-    assert len(record) == 0
+    sdml = SDML_Supervised(prior='identity', balance_param=1e-5)
+    sdml.fit(X, y)
 
   def test_iris(self):
     # Note: this is a flaky test, which fails for certain seeds.

From a5b13f2873c37579897e78aa474f8ffc79ef3475 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Wed, 5 Jun 2019 13:05:20 +0200
Subject: [PATCH 44/52] Be more precise: it should not raise any
 ConvergenceWarningError

---
 test/metric_learn_test.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 72ec8486..86fe62b9 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -422,16 +422,20 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self):
                              "that no warning should be thrown.")
   def test_raises_no_warning_installed_skggm(self):
     # otherwise we should be able to instantiate and fit SDML and it
-    # should raise no error
+    # should raise no error and no ConvergenceWarning
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
-    with pytest.warns(None) as record:
+    with pytest.warns(None) as records:
       sdml = SDML(prior='covariance')
       sdml.fit(pairs, y_pairs)
-    assert len(record) == 0
-    sdml = SDML_Supervised(prior='identity', balance_param=1e-5)
-    sdml.fit(X, y)
+    for record in records:
+      assert record.category is not ConvergenceWarning
+    with pytest.warns(None) as records:
+      sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5)
+      sdml_supervised.fit(X, y)
+    for record in records:
+      assert record.category is not ConvergenceWarning
 
   def test_iris(self):
     # Note: this is a flaky test, which fails for certain seeds.

From 0ea0aa63c5fef61de3557f34290ff9b5c59f39d0 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 6 Jun 2019 14:08:10 +0200
Subject: [PATCH 45/52] Address
 https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-245911227

---
 metric_learn/_util.py       | 23 ++++++++++++-----------
 metric_learn/base_metric.py |  2 +-
 metric_learn/itml.py        |  4 ++--
 metric_learn/lsml.py        | 16 ++++++++--------
 metric_learn/mmc.py         | 10 +++++-----
 metric_learn/nca.py         |  2 +-
 metric_learn/sdml.py        |  9 ++++-----
 test/metric_learn_test.py   |  2 +-
 8 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 7be39068..cb21acc5 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -341,7 +341,8 @@ def check_collapsed_pairs(pairs):
 def _check_sdp_from_eigen(w, tol=None):
   """Checks if some of the eigenvalues given are negative, up to a tolerance
   level, with a default value of the tolerance depending on the eigenvalues.
-  It also returns whether the matrix is definite.
+  It also returns whether the matrix is positive definite, up to the above
+  tolerance.
 
   Parameters
   ----------
@@ -356,7 +357,7 @@ def _check_sdp_from_eigen(w, tol=None):
   Returns
   -------
   is_definite : bool
-    Whether the matrix is definite or not.
+    Whether the matrix is positive definite or not.
 
   See Also
   --------
@@ -453,10 +454,10 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
 
       'auto'
           Depending on ``num_dims``, the most reasonable initialization will
-          be chosen. If ``num_dims <= n_classes`` we use 'lda' (if possible,
-          see the description of 'lda' init), as it uses labels information.
+          be chosen. If ``num_dims <= n_classes`` we use 'lda'
+          (see the description of 'lda' init), as it uses labels information.
           If not, but ``num_dims < min(n_features, n_samples)``, we use
-          'pca', as it projects data in meaningful directions (those of
+          'pca', as it projects data onto meaningful directions (those of
           higher variance). Otherwise, we just use 'identity'.
 
       'pca'
@@ -473,7 +474,7 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
           This initialization is possible only if `has_classes == True`.
 
       'identity'
-          If ``num_dims`` is strictly smaller than the
+          The identity matrix. If ``num_dims`` is strictly smaller than the
           dimensionality of the inputs passed to :meth:`fit`, the identity
           matrix will be truncated to the first ``num_dims`` rows.
 
@@ -592,8 +593,8 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
 def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
                                    return_inverse=False, strict_pd=False,
                                    matrix_name='matrix'):
-  """Returns a standard mahalanobis matrix that can be used as a prior or an
-  initialization
+  """Returns a PSD matrix that can be used as a prior or an initialization
+  for the Mahalanobis distance
 
   Parameters
   ----------
@@ -601,7 +602,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     The input samples (can be tuples or regular samples).
 
   init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+         Specification for the matrix to initialize. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
          (n_features, n_features).
 
@@ -613,7 +614,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
             covariance matrix is not definite and `strict_pd == True`)
 
          'random'
-             The initial transformation will be a random PD matrix of shape
+             A random positive definite (PD) matrix of shape
              `(n_features, n_features)`, generated using
              `sklearn.datasets.make_spd_matrix`.
 
@@ -624,7 +625,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
 
   random_state : int or `numpy.RandomState` or None, optional (default=None)
     A pseudo random number generator object or a seed for it if int. If
-    ``init='random'``, ``random_state`` is used to initialize the random
+    ``init='random'``, ``random_state`` is used to set the random Mahalanobis
     matrix. If ``init='pca'``, ``random_state`` is passed as an
     argument to PCA when initializing the matrix.
 
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index ab701f87..454880e4 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self):
 
     Returns
     -------
-    M : `numpy.ndarray`, shape=(num_dims, n_features)
+    M : `numpy.ndarray`, shape=(n_features, n_features)
       The copy of the learned Mahalanobis matrix.
     """
     return self.transformer_.T.dot(self.transformer_)
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 7514dcd8..93c6cd11 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -46,7 +46,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     convergence_threshold : float, optional
 
     prior : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+         The Mahalanobis matrix to use as a prior. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
          (n_features, n_features). For ITML, the prior should be strictly
          positive definite (PD).
@@ -288,7 +288,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
           `itml_supervised.fit(X, y, bounds=...)`
 
     prior : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+         Initialization of the Mahalanobis matrix. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
          (n_features, n_features). For ITML, the prior should be strictly
          positive definite (PD).
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 2a694dd8..fd29198b 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -50,8 +50,8 @@ def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random PD matrix of shape
-            `(n_features, n_features)`, generated using
+            The initial Mahalanobis matrix will be a random positive definite
+            (PD) matrix of shape `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
@@ -68,8 +68,8 @@ def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
         tuples will be formed like this: X[indices].
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``init='random'``, ``random_state`` is used to set the random
+        prior.
     """
     self.prior = prior
     self.tol = tol
@@ -244,8 +244,8 @@ def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random PD matrix of shape
-            `(n_features, n_features)`, generated using
+            The initial Mahalanobis matrix will be a random positive definite
+            (PD) matrix of shape `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
          numpy array
@@ -267,8 +267,8 @@ def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
         tuples will be formed like this: X[indices].
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``init='random'``, ``random_state`` is used to set the random
+        prior.
     """
     _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
                        verbose=verbose, preprocessor=preprocessor,
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index efe70f3b..48f935a3 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -45,7 +45,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     max_proj : int, optional
     convergence_threshold : float, optional
     init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+         Initialization of the Mahalanobis matrix. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
          (n_features, n_features).
 
@@ -56,7 +56,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
             The (pseudo-)inverse of the covariance matrix.
 
          'random'
-            The initial transformation will be a random SPD matrix of shape
+            The initial Mahalanobis matrix will be a random SPD matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -111,7 +111,7 @@ def _fit(self, pairs, y):
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
-    msg = ("Warning, as of version 0.5.0, the default prior is now "
+    msg = ("Warning, as of version 0.5.0, the default init is now "
            "'identity', instead of the identity divided by a scaling factor "
            "of 10. If you still want to use the same init as in previous "
            "versions, set 'init' == np.eye(d)/10, where d is the dimension "
@@ -476,7 +476,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
     num_constraints: int, optional
         number of constraints to generate
     init : string or numpy array, optional (default='identity')
-         Initialization of the linear transformation. Possible options are
+         Initialization of the Mahalanobis matrix. Possible options are
          'identity', 'covariance', 'random', and a numpy array of shape
          (n_features, n_features).
 
@@ -519,7 +519,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
         ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        Mahalanobis matrix.
     """
     _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
                       convergence_threshold=convergence_threshold,
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index c09b1ff1..e3fa4447 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -48,7 +48,7 @@ def __init__(self, init='auto', num_dims=None, max_iter=100, tol=None,
     Parameters
     ----------
     init : string or numpy array, optional (default='auto')
-        Initialization of the linear transformation. Possible options are
+        Initialization of the Mahalanobis matrix. Possible options are
         'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
         (n_features_a, n_features_b).
 
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index fbfc8645..35f32f68 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -60,7 +60,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
             The inverse covariance matrix.
 
          'random'
-            The initial transformation will be a random PD matrix of shape
+            The prior will be a random positive definite (PD) matrix of shape
             `(n_features, n_features)`, generated using
             `sklearn.datasets.make_spd_matrix`.
 
@@ -114,8 +114,7 @@ def _fit(self, pairs, y):
       msg = ("Warning, as of version 0.5.0, the default prior is now "
              "'identity', instead of 'covariance'. If you still want to use "
              "the inverse of the covariance matrix as a prior, "
-             "set 'prior'=='covariance' (it was the default in previous "
-             "versions since there was 'use_cov'==True). "
+             "set 'prior'=='covariance'. "
              "This warning will disappear in v0.6.0.")
       warnings.warn(msg, ChangedBehaviorWarning)
     _, prior_inv = _initialize_metric_mahalanobis(pairs, self.prior,
@@ -292,8 +291,8 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
         tuples will be formed like this: X[indices].
     random_state : int or numpy.RandomState or None, optional (default=None)
         A pseudo random number generator object or a seed for it if int. If
-        ``init='random'``, ``random_state`` is used to initialize the random
-        transformation.
+        ``init='random'``, ``random_state`` is used to set the random
+        prior.
     """
     _BaseSDML.__init__(self, balance_param=balance_param,
                        sparsity_param=sparsity_param, prior=prior,
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 6f25adbf..36e3b96b 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -924,7 +924,7 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     mmc_supervised = MMC_Supervised()
-    msg = ("Warning, as of version 0.5.0, the default prior is now "
+    msg = ("Warning, as of version 0.5.0, the default init is now "
            "'identity', instead of the identity divided by a scaling factor "
            "of 10. If you still want to use the same init as in previous "
            "versions, set 'init' == np.eye(d)/10, where d is the dimension "

From 6e452ed308075abe19533f0e85f3df3e76636be0 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 6 Jun 2019 14:11:39 +0200
Subject: [PATCH 46/52] FIX remaining comment

---
 metric_learn/mmc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 48f935a3..983446f0 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -487,8 +487,8 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
              The (pseudo-)inverse of the covariance matrix.
 
          'random'
-             The initial transformation will be a random SPD matrix of shape
-             `(n_features, n_features)`, generated using
+             The initial Mahalanobis matrix will be a random SPD matrix of
+             shape `(n_features, n_features)`, generated using
              `sklearn.datasets.make_spd_matrix`.
 
          numpy array

From 4f822a8c35ef0458407121c3c33d44a3dca43d16 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 6 Jun 2019 14:27:21 +0200
Subject: [PATCH 47/52] TST: update test error message

---
 test/metric_learn_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 36e3b96b..c1b260cb 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -553,8 +553,7 @@ def test_changed_behaviour_warning(self):
     msg = ("Warning, as of version 0.5.0, the default prior is now "
            "'identity', instead of 'covariance'. If you still want to use "
            "the inverse of the covariance matrix as a prior, "
-           "set 'prior'=='covariance' (it was the default in previous "
-           "versions since there was 'use_cov'==True). "
+           "set 'prior'=='covariance'. "
            "This warning will disappear in v0.6.0.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       sdml_supervised.fit(X, y)

From c19ca4c7307b56d3455d58643e109ce086b67e3c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Thu, 6 Jun 2019 14:41:18 +0200
Subject: [PATCH 48/52] Improve readability

---
 metric_learn/_util.py | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index cb21acc5..0cfdec4e 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -540,18 +540,15 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
                        '`init` ({})!'
                        .format(num_dims,
                                init.shape[0]))
-  elif init in authorized_inits:
-    pass
-  else:
+  elif init not in authorized_inits:
     raise ValueError(
         "`init` must be '{}' "
         "or a numpy array of shape (num_dims, n_features)."
         .format("', '".join(authorized_inits)))
 
   random_state = check_random_state(random_state)
-  transformation = init
   if isinstance(init, np.ndarray):
-    pass
+    return init
   else:
     n_samples = input.shape[0]
     if init == 'auto':
@@ -564,10 +561,9 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
       else:
         init = 'identity'
     if init == 'identity':
-      transformation = np.eye(num_dims, input.shape[-1])
+      return np.eye(num_dims, input.shape[-1])
     elif init == 'random':
-      transformation = random_state.randn(num_dims,
-                                          input.shape[-1])
+      return random_state.randn(num_dims, input.shape[-1])
     elif init in {'pca', 'lda'}:
       init_time = time.time()
       if init == 'pca':
@@ -587,7 +583,7 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
         transformation = lda.scalings_.T[:num_dims]
       if verbose:
         print('done in {:5.2f}s'.format(time.time() - init_time))
-  return transformation
+      return transformation
 
 
 def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
@@ -666,9 +662,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
     if not np.allclose(init, init.T):
       raise ValueError("`{}` is not symmetric.".format(matrix_name))
 
-  elif init in ['identity', 'covariance', 'random']:
-    pass
-  else:
+  elif init not in ['identity', 'covariance', 'random']:
     raise ValueError(
         "`{}` must be 'identity', 'covariance', 'random' "
         "or a numpy array of shape (n_features, n_features)."
@@ -687,11 +681,17 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
                         .format(*((matrix_name,) * 3)))
     if return_inverse:
       M_inv = np.dot(u / s, u.T)
+      return M, M_inv
+    else:
+      return M
   else:
     if init == 'identity':
       M = np.eye(n_features, n_features)
       if return_inverse:
         M_inv = M.copy()
+        return M, M_inv
+      else:
+        return M
     if init == 'covariance':
       if input.ndim == 3:
         # if the input are tuples, we need to form an X by deduplication
@@ -709,6 +709,10 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
                           "require the `{}` to be strictly positive definite."
                           .format(*((matrix_name,) * 2)))
       M = np.dot(u / s, u.T)
+      if return_inverse:
+        return M, M_inv
+      else:
+        return M
     elif init == 'random':
       # we need to create a random symmetric matrix
       M = make_spd_matrix(n_features, random_state=random_state)
@@ -718,7 +722,6 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
         # np.linalg.inv returns not symmetric inverses of symmetric matrices)
         # TODO: there might be a more efficient method to do so
         M_inv = pinvh(M)
-  if return_inverse:
-    return (M, M_inv)
-  else:
-    return M
+        return M, M_inv
+      else:
+        return M

From d8181d04e7f0dacc33e58c575303f4dbc822aee1 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 7 Jun 2019 11:33:25 +0200
Subject: [PATCH 49/52] Address
 https://github.com/metric-learn/metric-learn/pull/195#pullrequestreview-246553439

---
 metric_learn/_util.py     | 158 ++++++++++++++++++++------------------
 metric_learn/lmnn.py      |   2 +-
 metric_learn/lsml.py      |  37 +++++----
 metric_learn/mlkr.py      |  30 +++++---
 metric_learn/mmc.py       |  49 +++++++-----
 metric_learn/nca.py       |  34 ++++----
 metric_learn/sdml.py      |  46 +++++++----
 test/metric_learn_test.py |  49 +++++++-----
 test/test_base_metric.py  |  16 ++--
 test/test_utils.py        |  23 +++++-
 10 files changed, 264 insertions(+), 180 deletions(-)

diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 0cfdec4e..54db66f1 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -369,7 +369,7 @@ def _check_sdp_from_eigen(w, tol=None):
   if tol < 0:
     raise ValueError("tol should be positive.")
   if any(w < - tol):
-    raise NonPSDError
+    raise NonPSDError()
   if any(abs(w) < tol):
     return False
   return True
@@ -549,41 +549,48 @@ def _initialize_transformer(num_dims, input, y=None, init='auto',
   random_state = check_random_state(random_state)
   if isinstance(init, np.ndarray):
     return init
-  else:
-    n_samples = input.shape[0]
-    if init == 'auto':
-      if has_classes:
-        n_classes = len(np.unique(y))
-      if has_classes and num_dims <= min(n_features, n_classes - 1):
-        init = 'lda'
-      elif num_dims < min(n_features, n_samples):
-        init = 'pca'
-      else:
-        init = 'identity'
-    if init == 'identity':
-      return np.eye(num_dims, input.shape[-1])
-    elif init == 'random':
-      return random_state.randn(num_dims, input.shape[-1])
-    elif init in {'pca', 'lda'}:
-      init_time = time.time()
-      if init == 'pca':
-        pca = PCA(n_components=num_dims,
-                  random_state=random_state)
-        if verbose:
-          print('Finding principal components... ')
-          sys.stdout.flush()
-        pca.fit(input)
-        transformation = pca.components_
-      elif init == 'lda':
-        lda = LinearDiscriminantAnalysis(n_components=num_dims)
-        if verbose:
-          print('Finding most discriminative components... ')
-          sys.stdout.flush()
-        lda.fit(input, y)
-        transformation = lda.scalings_.T[:num_dims]
+  n_samples = input.shape[0]
+  if init == 'auto':
+    if has_classes:
+      n_classes = len(np.unique(y))
+    else:
+      n_classes = -1
+    init = _auto_select_init(has_classes, n_features, n_samples, num_dims,
+                             n_classes)
+  if init == 'identity':
+    return np.eye(num_dims, input.shape[-1])
+  elif init == 'random':
+    return random_state.randn(num_dims, input.shape[-1])
+  elif init in {'pca', 'lda'}:
+    init_time = time.time()
+    if init == 'pca':
+      pca = PCA(n_components=num_dims,
+                random_state=random_state)
+      if verbose:
+        print('Finding principal components... ')
+        sys.stdout.flush()
+      pca.fit(input)
+      transformation = pca.components_
+    elif init == 'lda':
+      lda = LinearDiscriminantAnalysis(n_components=num_dims)
       if verbose:
-        print('done in {:5.2f}s'.format(time.time() - init_time))
-      return transformation
+        print('Finding most discriminative components... ')
+        sys.stdout.flush()
+      lda.fit(input, y)
+      transformation = lda.scalings_.T[:num_dims]
+    if verbose:
+      print('done in {:5.2f}s'.format(time.time() - init_time))
+    return transformation
+
+
+def _auto_select_init(has_classes, n_features, n_samples, num_dims, n_classes):
+  if has_classes and num_dims <= min(n_features, n_classes - 1):
+    init = 'lda'
+  elif num_dims < min(n_features, n_samples):
+    init = 'pca'
+  else:
+    init = 'identity'
+  return init
 
 
 def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
@@ -684,44 +691,43 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
       return M, M_inv
     else:
       return M
-  else:
-    if init == 'identity':
-      M = np.eye(n_features, n_features)
-      if return_inverse:
-        M_inv = M.copy()
-        return M, M_inv
-      else:
-        return M
-    if init == 'covariance':
-      if input.ndim == 3:
-        # if the input are tuples, we need to form an X by deduplication
-        X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
-      else:
-        X = input
-      # atleast2d is necessary to deal with scalar covariance matrices
-      M_inv = np.atleast_2d(np.cov(X, rowvar=False))
-      s, u = scipy.linalg.eigh(M_inv)
-      cov_is_definite = _check_sdp_from_eigen(s)
-      if strict_pd and not cov_is_definite:
-        raise LinAlgError("Unable to get a true inverse of the covariance "
-                          "matrix since it is not definite. Try another "
-                          "`{}`, or an algorithm that does not "
-                          "require the `{}` to be strictly positive definite."
-                          .format(*((matrix_name,) * 2)))
-      M = np.dot(u / s, u.T)
-      if return_inverse:
-        return M, M_inv
-      else:
-        return M
-    elif init == 'random':
-      # we need to create a random symmetric matrix
-      M = make_spd_matrix(n_features, random_state=random_state)
-      if return_inverse:
-        # we use pinvh even if we know the matrix is definite, just because
-        # we need the returned matrix to be symmetric (and sometimes
-        # np.linalg.inv returns not symmetric inverses of symmetric matrices)
-        # TODO: there might be a more efficient method to do so
-        M_inv = pinvh(M)
-        return M, M_inv
-      else:
-        return M
+  elif init == 'identity':
+    M = np.eye(n_features, n_features)
+    if return_inverse:
+      M_inv = M.copy()
+      return M, M_inv
+    else:
+      return M
+  elif init == 'covariance':
+    if input.ndim == 3:
+      # if the input are tuples, we need to form an X by deduplication
+      X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
+    else:
+      X = input
+    # atleast2d is necessary to deal with scalar covariance matrices
+    M_inv = np.atleast_2d(np.cov(X, rowvar=False))
+    s, u = scipy.linalg.eigh(M_inv)
+    cov_is_definite = _check_sdp_from_eigen(s)
+    if strict_pd and not cov_is_definite:
+      raise LinAlgError("Unable to get a true inverse of the covariance "
+                        "matrix since it is not definite. Try another "
+                        "`{}`, or an algorithm that does not "
+                        "require the `{}` to be strictly positive definite."
+                        .format(*((matrix_name,) * 2)))
+    M = np.dot(u / s, u.T)
+    if return_inverse:
+      return M, M_inv
+    else:
+      return M
+  elif init == 'random':
+    # we need to create a random symmetric matrix
+    M = make_spd_matrix(n_features, random_state=random_state)
+    if return_inverse:
+      # we use pinvh even if we know the matrix is definite, just because
+      # we need the returned matrix to be symmetric (and sometimes
+      # np.linalg.inv returns not symmetric inverses of symmetric matrices)
+      # TODO: there might be a more efficient method to do so
+      M_inv = pinvh(M)
+      return M, M_inv
+    else:
+      return M
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 804ad3be..446d10a8 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -20,7 +20,7 @@
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
 
-from metric_learn._util import _initialize_transformer
+from ._util import _initialize_transformer
 from .base_metric import MahalanobisMixin
 
 
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index fd29198b..ecbe8bcb 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -31,17 +31,19 @@ class _BaseLSML(MahalanobisMixin):
 
   _tuple_size = 4  # constraints are quadruplets
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
+  def __init__(self, tol=1e-3, max_iter=1000, prior=None,
                verbose=False, preprocessor=None, random_state=None):
     """Initialize LSML.
 
     Parameters
     ----------
-    prior : string or numpy array, optional (default='identity')
+    prior : None, string or numpy array, optional (default=None)
          Prior to set for the metric. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For LSML, the prior should be strictly
-         positive definite (PD).
+         'identity', 'covariance', 'random', and a numpy array of
+         shape (n_features, n_features). For LSML, the prior should be strictly
+         positive definite (PD). If `None`, will be set
+         automatically to 'identity' (this is to raise a warning if
+         `prior` is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -93,14 +95,19 @@ def _fit(self, quadruplets, weights=None):
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
     # if the prior is the default (identity), we raise a warning just in case
-    if self.prior == 'identity':
-      msg = ("Warning, as of version 0.5.0, the default prior is now "
+    if self.prior is None:
+      msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
+             "the default prior will now be set to "
              "'identity', instead of 'covariance'. If you still want to use "
              "the inverse of the covariance matrix as a prior, "
              "set 'prior'=='covariance'. This warning will disappear in "
-             "v0.6.0.")
+             "v0.6.0, and `prior` parameter's default value will be set to "
+             "'identity'.")
       warnings.warn(msg, ChangedBehaviorWarning)
-    M, prior_inv = _initialize_metric_mahalanobis(quadruplets, self.prior,
+      prior = 'identity'
+    else:
+      prior = self.prior
+    M, prior_inv = _initialize_metric_mahalanobis(quadruplets, prior,
                                                   return_inverse=True,
                                                   strict_pd=True,
                                                   matrix_name='prior')
@@ -217,7 +224,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
+  def __init__(self, tol=1e-3, max_iter=1000, prior=None,
                num_labeled='deprecated', num_constraints=None, weights=None,
                verbose=False, preprocessor=None, random_state=None):
     """Initialize the supervised version of `LSML`.
@@ -231,11 +238,13 @@ def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
     ----------
     tol : float, optional
     max_iter : int, optional
-    prior : string or numpy array, optional (default='identity')
+    prior : None, string or numpy array, optional (default=None)
          Prior to set for the metric. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For LSML, the prior should be strictly
-         positive definite (PD).
+         'identity', 'covariance', 'random', and a numpy array of
+         shape (n_features, n_features). For LSML, the prior should be strictly
+         positive definite (PD). If `None`, will be set
+         automatically to 'identity' (this is to raise a warning if
+         `prior` is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index f596ee49..d0dd94ac 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -21,7 +21,7 @@
 
 from sklearn.metrics import pairwise_distances
 from .base_metric import MahalanobisMixin
-from metric_learn._util import _initialize_transformer
+from ._util import _initialize_transformer
 
 EPS = np.finfo(float).eps
 
@@ -38,7 +38,7 @@ class MLKR(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims=None, init='auto', A0='deprecated',
+  def __init__(self, num_dims=None, init=None, A0='deprecated',
                tol=None, max_iter=1000, verbose=False, preprocessor=None,
                random_state=None):
     """
@@ -49,10 +49,12 @@ def __init__(self, num_dims=None, init='auto', A0='deprecated',
     num_dims : int, optional
         Dimensionality of reduced space (defaults to dimension of X)
 
-    init : string or numpy array, optional (default='auto')
+    init : None, string or numpy array, optional (default=None)
         Initialization of the linear transformation. Possible options are
         'auto', 'pca', 'identity', 'random', and a numpy array of shape
-        (n_features_a, n_features_b).
+        (n_features_a, n_features_b). If None, will be set automatically to
+        'auto' (this option is to raise a warning if 'init' is not set,
+        and stays to its default value None, in v0.5.0).
 
         'auto'
             Depending on ``num_dims``, the most reasonable initialization
@@ -89,7 +91,7 @@ def __init__(self, num_dims=None, init='auto', A0='deprecated',
         Convergence tolerance for the optimization.
 
     max_iter: int, optional
-        Cap on number of congugate gradient iterations.
+        Cap on number of conjugate gradient iterations.
 
     verbose : bool, optional (default=False)
         Whether to print progress messages or not.
@@ -139,13 +141,19 @@ def fit(self, X, y):
       if m is None:
           m = d
       # if the init is the default (identity), we raise a warning just in case
-      if self.init == 'auto':
-        msg = ("Warning, as of version 0.5.0, the default init is now "
-               "'auto', instead of 'pca'. If you still want to use "
-               "PCA as an init, set 'init'=='pca'. This warning will "
-               "disappear in v0.6.0.")
+      if self.init is None:
+        # TODO:
+        #  replace init=None by init='auto' in v0.6.0 and remove the warning
+        msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+               "the default init will now be set to 'auto', instead of 'pca'. "
+               "If you still want to use PCA as an init, set `init`='pca'. "
+               "This warning will disappear in v0.6.0, and `init` parameter's"
+               " default value will be set to 'auto'.")
         warnings.warn(msg, ChangedBehaviorWarning)
-      A = _initialize_transformer(m, X, y, init=self.init,
+        init = 'auto'
+      else:
+        init = self.init
+      A = _initialize_transformer(m, X, y, init=init,
                                   random_state=self.random_state,
                                   # MLKR works on regression targets:
                                   has_classes=False)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 983446f0..c17cdea0 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -35,7 +35,7 @@ class _BaseMMC(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
-               init='identity', A0='deprecated', diagonal=False,
+               init=None, A0='deprecated', diagonal=False,
                diagonal_c=1.0, verbose=False, preprocessor=None,
                random_state=None):
     """Initialize MMC.
@@ -44,15 +44,17 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     max_iter : int, optional
     max_proj : int, optional
     convergence_threshold : float, optional
-    init : string or numpy array, optional (default='identity')
-         Initialization of the Mahalanobis matrix. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+    init : None, string or numpy array, optional (default=None)
+        Initialization of the Mahalanobis matrix. Possible options are
+        'identity', 'covariance', 'random', and a numpy array of
+        shape (n_features, n_features). If None, will be set
+        automatically to 'identity' (this is to raise a warning if
+        'init' is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
 
-        'covariance'
+         'covariance'
             The (pseudo-)inverse of the covariance matrix.
 
          'random'
@@ -111,15 +113,22 @@ def _fit(self, pairs, y):
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
-    msg = ("Warning, as of version 0.5.0, the default init is now "
-           "'identity', instead of the identity divided by a scaling factor "
-           "of 10. If you still want to use the same init as in previous "
-           "versions, set 'init' == np.eye(d)/10, where d is the dimension "
-           "of your input space (d=pairs.shape[1]). "
-           "This warning will disappear in v0.6.0.")
-    warnings.warn(msg, ChangedBehaviorWarning)
+    if self.init is None:
+      # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning
+      msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+             "the default init will now be set to 'identity', instead of the "
+             "identity divided by a scaling factor of 10. "
+             "If you still want to use the same init as in previous "
+             "versions, set `init`=np.eye(d)/10, where d is the dimension "
+             "of your input space (d=pairs.shape[1]). "
+             "This warning will disappear in v0.6.0, and `init` parameter's"
+             " default value will be set to 'auto'.")
+      warnings.warn(msg, ChangedBehaviorWarning)
+      init = 'identity'
+    else:
+      init = self.init
 
-    self.A_ = _initialize_metric_mahalanobis(pairs, self.init,
+    self.A_ = _initialize_metric_mahalanobis(pairs, init,
                                              random_state=self.random_state,
                                              matrix_name='init')
 
@@ -455,7 +464,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   """
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
-               num_labeled='deprecated', num_constraints=None, init='identity',
+               num_labeled='deprecated', num_constraints=None, init=None,
                A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False,
                preprocessor=None, random_state=None):
     """Initialize the supervised version of `MMC`.
@@ -475,10 +484,12 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
          be removed in 0.6.0.
     num_constraints: int, optional
         number of constraints to generate
-    init : string or numpy array, optional (default='identity')
-         Initialization of the Mahalanobis matrix. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features).
+    init : None, string or numpy array, optional (default=None)
+        Initialization of the Mahalanobis matrix. Possible options are
+        'identity', 'covariance', 'random', and a numpy array of
+        shape (n_features, n_features). If None, will be set
+        automatically to 'identity' (this is to raise a warning if
+        'init' is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
              An identity matrix of shape (n_features, n_features).
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index e3fa4447..d970d6a3 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -23,7 +23,7 @@
 from sklearn.utils.fixes import logsumexp
 from sklearn.base import TransformerMixin
 
-from metric_learn._util import _initialize_transformer
+from ._util import _initialize_transformer
 from .base_metric import MahalanobisMixin
 
 EPS = np.finfo(float).eps
@@ -41,16 +41,18 @@ class NCA(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, init='auto', num_dims=None, max_iter=100, tol=None,
+  def __init__(self, init=None, num_dims=None, max_iter=100, tol=None,
                verbose=False, preprocessor=None, random_state=None):
     """Neighborhood Components Analysis
 
     Parameters
     ----------
-    init : string or numpy array, optional (default='auto')
-        Initialization of the Mahalanobis matrix. Possible options are
-        'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape
-        (n_features_a, n_features_b).
+    init : None, string or numpy array, optional (default=None)
+        Initialization of the linear transformation. Possible options are
+        'auto', 'pca', 'identity', 'random', and a numpy array of shape
+        (n_features_a, n_features_b). If None, will be set automatically to
+        'auto' (this option is to raise a warning if 'init' is not set,
+        and stays to its default value None, in v0.5.0).
 
         'auto'
             Depending on ``num_dims``, the most reasonable initialization
@@ -130,14 +132,20 @@ def fit(self, X, y):
 
     # Initialize A
     # if the init is the default (auto), we raise a warning just in case
-    if self.init == 'auto':
-      msg = ("Warning, as of version 0.5.0, the default init is now "
-             "'auto', instead of the previous scaling matrix. If you still "
-             "want to use the same scaling matrix as before as an init, "
-             "set 'init'==np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min("
-             "axis=0), EPS))). This warning will disappear in v0.6.0.")
+    if self.init is None:
+      # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning
+      msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+             "the default init will now be set to 'auto', instead of the "
+             "previous scaling matrix. same scaling matrix as before as an "
+             "init, set `init`=np.eye(X.shape[1])/"
+             "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning "
+             "will disappear in v0.6.0, and `init` parameter's default value "
+             "will be set to 'auto'.")
       warnings.warn(msg, ChangedBehaviorWarning)
-    A = _initialize_transformer(num_dims, X, labels, self.init, self.verbose)
+      init = 'auto'
+    else:
+      init = self.init
+    A = _initialize_transformer(num_dims, X, labels, init, self.verbose)
 
     # Run NCA
     mask = labels[:, np.newaxis] == labels[np.newaxis, :]
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 35f32f68..b268903f 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -35,7 +35,7 @@ class _BaseSDML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
                use_cov='deprecated', verbose=False, preprocessor=None,
                random_state=None):
     """
@@ -47,11 +47,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
 
-    prior : string or numpy array, optional (default='identity')
+    prior : None, string or numpy array, optional (default=None)
          Prior to set for the metric. Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For SDML, the prior should be strictly
-         positive definite (PD).
+         'identity', 'covariance', 'random', and a numpy array of
+         shape (n_features, n_features). For SDML, the prior should be strictly
+         positive definite (PD). If `None`, will be set
+         automatically to 'identity' (this is to raise a warning if
+         `prior` is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
@@ -110,14 +112,22 @@ def _fit(self, pairs, y):
 
     # set up (the inverse of) the prior M
     # if the prior is the default (identity), we raise a warning just in case
-    if self.prior == 'identity':
-      msg = ("Warning, as of version 0.5.0, the default prior is now "
+    if self.prior is None:
+      # TODO:
+      #  replace prior=None by prior='identity' in v0.6.0 and remove the
+      #  warning
+      msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
+             "the default prior will now be set to "
              "'identity', instead of 'covariance'. If you still want to use "
              "the inverse of the covariance matrix as a prior, "
-             "set 'prior'=='covariance'. "
-             "This warning will disappear in v0.6.0.")
+             "set 'prior'=='covariance'. This warning will disappear in "
+             "v0.6.0, and `prior` parameter's default value will be set to "
+             "'identity'.")
       warnings.warn(msg, ChangedBehaviorWarning)
-    _, prior_inv = _initialize_metric_mahalanobis(pairs, self.prior,
+      prior = 'identity'
+    else:
+      prior = self.prior
+    _, prior_inv = _initialize_metric_mahalanobis(pairs, prior,
                                                   return_inverse=True,
                                                   strict_pd=True,
                                                   matrix_name='prior')
@@ -135,7 +145,7 @@ def _fit(self, pairs, y):
                     "positive semi-definite (PSD). The algorithm may diverge, "
                     "and lead to degenerate solutions. "
                     "To prevent that, try to decrease the balance parameter "
-                    "`balance_param` and/or to set prior='identity'.",
+                    "`balance_param` and/or to set `prior`='identity'.",
                     ConvergenceWarning)
       w -= min_eigval  # we translate the eigenvalues to make them all positive
     w += 1e-10  # we add a small offset to avoid definiteness problems
@@ -235,7 +245,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       metric (See function `transformer_from_metric`.)
   """
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
                use_cov='deprecated', num_labeled='deprecated',
                num_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
@@ -251,11 +261,13 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
         trade off between sparsity and M0 prior
     sparsity_param : float, optional
         trade off between optimizer and sparseness (see graph_lasso)
-    prior : string or numpy array, optional (default='identity')
-          Possible options are
-         'identity', 'covariance', 'random', and a numpy array of shape
-         (n_features, n_features). For SDML, the prior should be strictly
-         positive definite (PD).
+    prior : None, string or numpy array, optional (default=None)
+         Prior to set for the metric. Possible options are
+         'identity', 'covariance', 'random', and a numpy array of
+         shape (n_features, n_features). For SDML, the prior should be strictly
+         positive definite (PD). If `None`, will be set
+         automatically to 'identity' (this is to raise a warning if
+         `prior` is not set, and stays to its default value (None), in v0.5.0).
 
          'identity'
             An identity matrix of shape (n_features, n_features).
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index c1b260cb..cb019961 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -100,11 +100,13 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     lsml_supervised = LSML_Supervised()
-    msg = ("Warning, as of version 0.5.0, the default prior is now "
+    msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
+           "the default prior will now be set to "
            "'identity', instead of 'covariance'. If you still want to use "
            "the inverse of the covariance matrix as a prior, "
            "set 'prior'=='covariance'. This warning will disappear in "
-           "v0.6.0.")
+           "v0.6.0, and `prior` parameter's default value will be set to "
+           "'identity'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       lsml_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
@@ -489,7 +491,7 @@ def test_sdml_raises_warning_non_psd(self):
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
-           "`balance_param` and/or to set prior='identity'.")
+           "`balance_param` and/or to set `prior`='identity'.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
       try:
         sdml.fit(pairs, y)
@@ -550,11 +552,13 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     sdml_supervised = SDML_Supervised(balance_param=1e-5)
-    msg = ("Warning, as of version 0.5.0, the default prior is now "
+    msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
+           "the default prior will now be set to "
            "'identity', instead of 'covariance'. If you still want to use "
            "the inverse of the covariance matrix as a prior, "
-           "set 'prior'=='covariance'. "
-           "This warning will disappear in v0.6.0.")
+           "set 'prior'=='covariance'. This warning will disappear in "
+           "v0.6.0, and `prior` parameter's default value will be set to "
+           "'identity'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       sdml_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
@@ -738,11 +742,13 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     nca = NCA()
-    msg = ("Warning, as of version 0.5.0, the default init is now "
-           "'auto', instead of the previous scaling matrix. If you still "
-           "want to use the same scaling matrix as before as an init, "
-           "set 'init'==np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min("
-           "axis=0), EPS))). This warning will disappear in v0.6.0.")
+    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+           "the default init will now be set to 'auto', instead of the "
+           "previous scaling matrix. same scaling matrix as before as an "
+           "init, set `init`=np.eye(X.shape[1])/"
+           "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning will"
+           " disappear in v0.6.0, and `init` parameter's default value will "
+           "be set to 'auto'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       nca.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
@@ -835,10 +841,11 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([0.1, 0.2, 0.3, 0.4])
     mlkr = MLKR()
-    msg = ("Warning, as of version 0.5.0, the default init is now "
-           "'auto', instead of 'pca'. If you still want to use "
-           "PCA as an init, set 'init'=='pca'. This warning will "
-           "disappear in v0.6.0.")
+    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+           "the default init will now be set to 'auto', instead of 'pca'. "
+           "If you still want to use PCA as an init, set `init`='pca'. "
+           "This warning will disappear in v0.6.0, and `init` parameter's"
+           " default value will be set to 'auto'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       mlkr.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
@@ -923,12 +930,14 @@ def test_changed_behaviour_warning(self):
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     mmc_supervised = MMC_Supervised()
-    msg = ("Warning, as of version 0.5.0, the default init is now "
-           "'identity', instead of the identity divided by a scaling factor "
-           "of 10. If you still want to use the same init as in previous "
-           "versions, set 'init' == np.eye(d)/10, where d is the dimension "
+    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
+           "the default init will now be set to 'identity', instead of the "
+           "identity divided by a scaling factor of 10. "
+           "If you still want to use the same init as in previous "
+           "versions, set `init`=np.eye(d)/10, where d is the dimension "
            "of your input space (d=pairs.shape[1]). "
-           "This warning will disappear in v0.6.0.")
+           "This warning will disappear in v0.6.0, and `init` parameter's"
+           " default value will be set to 'auto'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
       mmc_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index b8ee4803..ff4301f0 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -29,7 +29,7 @@ def test_lmnn(self):
   def test_nca(self):
     self.assertEqual(remove_spaces(str(metric_learn.NCA())),
                      remove_spaces(
-        "NCA(init='auto', max_iter=100, num_dims=None, "
+        "NCA(init=None, max_iter=100, num_dims=None, "
         "preprocessor=None,\n  random_state=None, "
         "tol=None, verbose=False)"))
 
@@ -58,26 +58,26 @@ def test_itml(self):
   def test_lsml(self):
     self.assertEqual(remove_spaces(str(metric_learn.LSML())),
                      remove_spaces("""
-LSML(max_iter=1000, preprocessor=None, prior='identity',
+LSML(max_iter=1000, preprocessor=None, prior=None,
    random_state=None, tol=0.001, verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())),
                      remove_spaces("""
 LSML_Supervised(max_iter=1000, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, prior='identity',
+        num_labeled='deprecated', preprocessor=None, prior=None,
         random_state=None, tol=0.001, verbose=False, weights=None)
 """))
 
   def test_sdml(self):
     self.assertEqual(remove_spaces(str(metric_learn.SDML())),
                      remove_spaces("""
-SDML(balance_param=0.5, preprocessor=None, prior='identity', random_state=None,
+SDML(balance_param=0.5, preprocessor=None, prior=None, random_state=None,
    sparsity_param=0.01, use_cov='deprecated', verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())),
                      remove_spaces("""
 SDML_Supervised(balance_param=0.5, num_constraints=None,
-        num_labeled='deprecated', preprocessor=None, prior='identity',
+        num_labeled='deprecated', preprocessor=None, prior=None,
         random_state=None, sparsity_param=0.01, use_cov='deprecated',
         verbose=False)
 """))
@@ -95,7 +95,7 @@ def test_rca(self):
   def test_mlkr(self):
     self.assertEqual(remove_spaces(str(metric_learn.MLKR())),
                      remove_spaces("""
-MLKR(A0='deprecated', init='auto', max_iter=1000, num_dims=None,
+MLKR(A0='deprecated', init=None, max_iter=1000, num_dims=None,
    preprocessor=None, random_state=None, tol=None, verbose=False)
 """))
 
@@ -103,13 +103,13 @@ def test_mmc(self):
     self.assertEqual(remove_spaces(str(metric_learn.MMC())),
                      remove_spaces("""
 MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
-  diagonal_c=1.0, init='identity', max_iter=100, max_proj=10000,
+  diagonal_c=1.0, init=None, max_iter=100, max_proj=10000,
   preprocessor=None, random_state=None, verbose=False)
 """))
     self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())),
                      remove_spaces("""
 MMC_Supervised(A0='deprecated', convergence_threshold=1e-06, diagonal=False,
-        diagonal_c=1.0, init='identity', max_iter=100, max_proj=10000,
+        diagonal_c=1.0, init=None, max_iter=100, max_proj=10000,
         num_constraints=None, num_labeled='deprecated', preprocessor=None,
         random_state=None, verbose=False)
 """))
diff --git a/test/test_utils.py b/test/test_utils.py
index 18c176d8..3365f5d5 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -11,7 +11,8 @@
                                 make_name, preprocess_points,
                                 check_collapsed_pairs, validate_vector,
                                 _check_sdp_from_eigen,
-                                check_y_valid_values_for_pairs)
+                                check_y_valid_values_for_pairs,
+                                _auto_select_init)
 from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA,
                           LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised,
                           MMC_Supervised, RCA_Supervised, SDML_Supervised,
@@ -1107,3 +1108,23 @@ def test_check_input_pairs_learners_invalid_y(estimator, build_dataset,
     with pytest.raises(ValueError) as raised_error:
       model.fit(input_data, wrong_labels)
   assert str(raised_error.value) == expected_msg
+
+
+@pytest.mark.parametrize('has_classes, n_features, n_samples, num_dims, '
+                         'n_classes, result',
+                         [(False, 3, 20, 3, 0, 'identity'),
+                          (False, 3, 2, 3, 0, 'identity'),
+                          (False, 5, 3, 4, 0, 'identity'),
+                          (False, 4, 5, 3, 0, 'pca'),
+                          (True, 5, 6, 3, 4, 'lda'),
+                          (True, 6, 3, 3, 3, 'identity'),
+                          (True, 5, 6, 4, 2, 'pca'),
+                          (True, 2, 6, 2, 10, 'lda'),
+                          (True, 4, 6, 2, 3, 'lda')
+                          ])
+def test__auto_select_init(has_classes, n_features, n_samples, num_dims,
+                           n_classes,
+                           result):
+  """Checks that the auto selection of the init works as expected"""
+  assert (_auto_select_init(has_classes, n_features,
+                            n_samples, num_dims, n_classes) == result)

From e27d8a1b8957a91823e6a746e57e7e64642daf5f Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 7 Jun 2019 15:31:25 +0200
Subject: [PATCH 50/52] TST: Fix docsting lmnn

---
 test/test_base_metric.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 8260d045..1b312b35 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -23,9 +23,9 @@ def test_lmnn(self):
       str(metric_learn.LMNN()),
         r"(python_)?LMNN\(convergence_tol=0.001, init='auto', k=3, "
         r"learn_rate=1e-07,\s+"
-        r"max_iter=1000, min_iter=50, n_components=None,\s+"
-        r"num_dims='deprecated', preprocessor=None, random_state=None,\s+"
-        r"regularization=0.5, use_pca=True, verbose=False\)")
+        r"max_iter=1000, min_iter=50, n_components=None, "
+        r"num_dims='deprecated',\s+preprocessor=None, random_state=None, "
+        r"regularization=0.5,\s+use_pca=True, verbose=False\)")
 
   def test_nca(self):
     self.assertEqual(remove_spaces(str(metric_learn.NCA())),

From 4a861c8225630a2a2ed096cec0795c6a9b641133 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 7 Jun 2019 15:38:53 +0200
Subject: [PATCH 51/52] Fix warning messages

---
 metric_learn/lsml.py      | 2 +-
 metric_learn/mlkr.py      | 2 +-
 metric_learn/mmc.py       | 2 +-
 metric_learn/nca.py       | 2 +-
 metric_learn/sdml.py      | 4 ++--
 test/metric_learn_test.py | 8 ++++----
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index a8a57f00..4350b003 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -100,7 +100,7 @@ def _fit(self, quadruplets, weights=None):
              "the default prior will now be set to "
              "'identity', instead of 'covariance'. If you still want to use "
              "the inverse of the covariance matrix as a prior, "
-             "set 'prior'=='covariance'. This warning will disappear in "
+             "set prior='covariance'. This warning will disappear in "
              "v0.6.0, and `prior` parameter's default value will be set to "
              "'identity'.")
       warnings.warn(msg, ChangedBehaviorWarning)
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 7a0e9aea..9e9cf433 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -162,7 +162,7 @@ def fit(self, X, y):
         #  replace init=None by init='auto' in v0.6.0 and remove the warning
         msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
                "the default init will now be set to 'auto', instead of 'pca'. "
-               "If you still want to use PCA as an init, set `init`='pca'. "
+               "If you still want to use PCA as an init, set init='pca'. "
                "This warning will disappear in v0.6.0, and `init` parameter's"
                " default value will be set to 'auto'.")
         warnings.warn(msg, ChangedBehaviorWarning)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 73c47e11..b3e6c203 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -119,7 +119,7 @@ def _fit(self, pairs, y):
              "the default init will now be set to 'identity', instead of the "
              "identity divided by a scaling factor of 10. "
              "If you still want to use the same init as in previous "
-             "versions, set `init`=np.eye(d)/10, where d is the dimension "
+             "versions, set init=np.eye(d)/10, where d is the dimension "
              "of your input space (d=pairs.shape[1]). "
              "This warning will disappear in v0.6.0, and `init` parameter's"
              " default value will be set to 'auto'.")
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index 6862832f..1626e02f 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -147,7 +147,7 @@ def fit(self, X, y):
       msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
              "the default init will now be set to 'auto', instead of the "
              "previous scaling matrix. same scaling matrix as before as an "
-             "init, set `init`=np.eye(X.shape[1])/"
+             "init, set init=np.eye(X.shape[1])/"
              "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning "
              "will disappear in v0.6.0, and `init` parameter's default value "
              "will be set to 'auto'.")
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index c324a72b..b83c553d 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -120,7 +120,7 @@ def _fit(self, pairs, y):
              "the default prior will now be set to "
              "'identity', instead of 'covariance'. If you still want to use "
              "the inverse of the covariance matrix as a prior, "
-             "set 'prior'=='covariance'. This warning will disappear in "
+             "set prior='covariance'. This warning will disappear in "
              "v0.6.0, and `prior` parameter's default value will be set to "
              "'identity'.")
       warnings.warn(msg, ChangedBehaviorWarning)
@@ -145,7 +145,7 @@ def _fit(self, pairs, y):
                     "positive semi-definite (PSD). The algorithm may diverge, "
                     "and lead to degenerate solutions. "
                     "To prevent that, try to decrease the balance parameter "
-                    "`balance_param` and/or to set `prior`='identity'.",
+                    "`balance_param` and/or to set prior='identity'.",
                     ConvergenceWarning)
       w -= min_eigval  # we translate the eigenvalues to make them all positive
     w += 1e-10  # we add a small offset to avoid definiteness problems
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index fcd86fe7..ad97eceb 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -491,7 +491,7 @@ def test_sdml_raises_warning_non_psd(self):
            "positive semi-definite (PSD). The algorithm may diverge, "
            "and lead to degenerate solutions. "
            "To prevent that, try to decrease the balance parameter "
-           "`balance_param` and/or to set `prior`='identity'.")
+           "`balance_param` and/or to set prior='identity'.")
     with pytest.warns(ConvergenceWarning) as raised_warning:
       try:
         sdml.fit(pairs, y)
@@ -745,7 +745,7 @@ def test_changed_behaviour_warning(self):
     msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
            "the default init will now be set to 'auto', instead of the "
            "previous scaling matrix. same scaling matrix as before as an "
-           "init, set `init`=np.eye(X.shape[1])/"
+           "init, set init=np.eye(X.shape[1])/"
            "(np.maximum(X.max(axis=0)-X.min(axis=0), EPS))). This warning will"
            " disappear in v0.6.0, and `init` parameter's default value will "
            "be set to 'auto'.")
@@ -900,7 +900,7 @@ def test_changed_behaviour_warning(self):
     mlkr = MLKR()
     msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
            "the default init will now be set to 'auto', instead of 'pca'. "
-           "If you still want to use PCA as an init, set `init`='pca'. "
+           "If you still want to use PCA as an init, set init='pca'. "
            "This warning will disappear in v0.6.0, and `init` parameter's"
            " default value will be set to 'auto'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
@@ -1007,7 +1007,7 @@ def test_changed_behaviour_warning(self):
            "the default init will now be set to 'identity', instead of the "
            "identity divided by a scaling factor of 10. "
            "If you still want to use the same init as in previous "
-           "versions, set `init`=np.eye(d)/10, where d is the dimension "
+           "versions, set init=np.eye(d)/10, where d is the dimension "
            "of your input space (d=pairs.shape[1]). "
            "This warning will disappear in v0.6.0, and `init` parameter's"
            " default value will be set to 'auto'.")

From dd2b8c7cecd000e4375e7642f3608686c5865ce3 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 7 Jun 2019 15:56:15 +0200
Subject: [PATCH 52/52] Fix warnings messages changed

---
 test/metric_learn_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index ad97eceb..18643363 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -104,7 +104,7 @@ def test_changed_behaviour_warning(self):
            "the default prior will now be set to "
            "'identity', instead of 'covariance'. If you still want to use "
            "the inverse of the covariance matrix as a prior, "
-           "set 'prior'=='covariance'. This warning will disappear in "
+           "set prior='covariance'. This warning will disappear in "
            "v0.6.0, and `prior` parameter's default value will be set to "
            "'identity'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning:
@@ -556,7 +556,7 @@ def test_changed_behaviour_warning(self):
            "the default prior will now be set to "
            "'identity', instead of 'covariance'. If you still want to use "
            "the inverse of the covariance matrix as a prior, "
-           "set 'prior'=='covariance'. This warning will disappear in "
+           "set prior='covariance'. This warning will disappear in "
            "v0.6.0, and `prior` parameter's default value will be set to "
            "'identity'.")
     with pytest.warns(ChangedBehaviorWarning) as raised_warning: