diff --git a/README.rst b/README.rst
index ceb2eb33..20850964 100644
--- a/README.rst
+++ b/README.rst
@@ -11,6 +11,7 @@ metric-learn contains efficient Python implementations of several popular superv
 -  Information Theoretic Metric Learning (ITML)
 -  Sparse Determinant Metric Learning (SDML)
 -  Least Squares Metric Learning (LSML)
+-  Sparse Compositional Metric Learning (SCML)
 -  Neighborhood Components Analysis (NCA)
 -  Local Fisher Discriminant Analysis (LFDA)
 -  Relative Components Analysis (RCA)
diff --git a/doc/conf.py b/doc/conf.py
index 796b7861..b6408d31 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -24,8 +24,8 @@
              u'Bellet and Nathalie Vauquier')
 author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and '
           u'Nathalie Vauquier')
-version = '0.5.0'
-release = '0.5.0'
+version = '0.6.0'
+release = '0.6.0'
 language = 'en'
 
 exclude_patterns = ['_build']
diff --git a/doc/modules.rst b/doc/modules.rst
new file mode 100644
index 00000000..55d5ad40
--- /dev/null
+++ b/doc/modules.rst
@@ -0,0 +1,7 @@
+metric_learn
+============
+
+.. toctree::
+   :maxdepth: 4
+
+   metric_learn
diff --git a/doc/supervised.rst b/doc/supervised.rst
index fc77287b..1b1180e9 100644
--- a/doc/supervised.rst
+++ b/doc/supervised.rst
@@ -50,7 +50,7 @@ classes will be large. To do so, we fit the metric learner (example:
 >>> from metric_learn import NCA
 >>> nca = NCA(random_state=42)
 >>> nca.fit(X, y)
-NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated',
+NCA(init='auto', max_iter=100, n_components=None,
   preprocessor=None, random_state=42, tol=None, verbose=False)
 
 
diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
index 82793b5b..174210b8 100644
--- a/doc/weakly_supervised.rst
+++ b/doc/weakly_supervised.rst
@@ -135,7 +135,7 @@ are respected.
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(tuples, y)
 MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
-  diagonal_c=1.0, init=None, max_iter=100, max_proj=10000,
+  diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000,
   preprocessor=None, random_state=42, verbose=False)
 
 Or alternatively (using a preprocessor):
@@ -250,8 +250,8 @@ tuples).
 >>> y_pairs = np.array([1, -1])
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(pairs, y_pairs)
-MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
-    diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None,
+MMC(convergence_threshold=0.001, diagonal=False,
+    diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None,
     random_state=42, verbose=False)
 
 Here, we learned a metric that puts the two first points closer
diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py
index 014d9af3..71229554 100644
--- a/examples/plot_metric_learning_examples.py
+++ b/examples/plot_metric_learning_examples.py
@@ -289,7 +289,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 # - See more in the documentation of the class :py:class:`LFDA
 #   <metric_learn.LFDA>`
 
-lfda = metric_learn.LFDA(k=2, num_dims=2)
+lfda = metric_learn.LFDA(k=2, n_components=2)
 X_lfda = lfda.fit_transform(X, y)
 
 plot_tsne(X_lfda, y)
diff --git a/metric_learn/_version.py b/metric_learn/_version.py
index 2b8877c5..ef7eb44d 100644
--- a/metric_learn/_version.py
+++ b/metric_learn/_version.py
@@ -1 +1 @@
-__version__ = '0.5.0'
+__version__ = '0.6.0'
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index d1af0821..721d7ba0 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -9,7 +9,6 @@
 import numpy as np
 from abc import ABCMeta, abstractmethod
 from ._util import ArrayIndexer, check_input, validate_vector
-import warnings
 
 
 class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta):
@@ -285,15 +284,6 @@ def metric_fun(u, v, squared=False):
 
   get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__
 
-  def metric(self):
-    """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix`
-    instead"""
-    # TODO: remove this method in version 0.6.0
-    warnings.warn(("`metric` is deprecated since version 0.5.0 and will be "
-                   "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."),
-                  DeprecationWarning)
-    return self.get_mahalanobis_matrix()
-
   def get_mahalanobis_matrix(self):
     """Returns a copy of the Mahalanobis matrix learned by the metric learner.
 
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 48d5a222..43872b60 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -2,9 +2,7 @@
 Information Theoretic Metric Learning (ITML)
 """
 
-import warnings
 import numpy as np
-from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.metrics import pairwise_distances
 from sklearn.utils.validation import check_array
 from sklearn.base import TransformerMixin
@@ -19,23 +17,17 @@ class _BaseITML(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
-               prior='identity', A0='deprecated', verbose=False,
+               prior='identity', verbose=False,
                preprocessor=None, random_state=None):
     self.gamma = gamma
     self.max_iter = max_iter
     self.convergence_threshold = convergence_threshold
     self.prior = prior
-    self.A0 = A0
     self.verbose = verbose
     self.random_state = random_state
     super(_BaseITML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y, bounds=None):
-    if self.A0 != 'deprecated':
-      warnings.warn('"A0" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "prior" instead.',
-                    DeprecationWarning)
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
     # init bounds
@@ -155,11 +147,6 @@ class ITML(_BaseITML, _PairsClassifierMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  A0 : Not used
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'prior' instead.
-
   verbose : bool, optional (default=False)
     If True, prints information while learning
 
@@ -276,21 +263,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   convergence_threshold : float, optional (default=1e-3)
     Tolerance of the optimization procedure.
 
-  num_labeled : Not used
-    .. deprecated:: 0.5.0
-      `num_labeled` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   num_constraints : int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
-  bounds : Not used
-    .. deprecated:: 0.5.0
-      `bounds` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Set `bounds` at fit time instead :
-      `itml_supervised.fit(X, y, bounds=...)`
-
   prior : string or numpy array, optional (default='identity')
     Initialization of the Mahalanobis matrix. Possible options are
     'identity', 'covariance', 'random', and a numpy array of shape
@@ -313,11 +289,6 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  A0 : Not used
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'prior' instead.
-
   verbose : bool, optional (default=False)
     If True, prints information while learning
 
@@ -368,18 +339,15 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   """
 
   def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3,
-               num_labeled='deprecated', num_constraints=None,
-               bounds='deprecated', prior='identity', A0='deprecated',
+               num_constraints=None, prior='identity',
                verbose=False, preprocessor=None, random_state=None):
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
                        convergence_threshold=convergence_threshold,
-                       A0=A0, prior=prior, verbose=verbose,
+                       prior=prior, verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
-    self.num_labeled = num_labeled
     self.num_constraints = num_constraints
-    self.bounds = bounds
 
-  def fit(self, X, y, random_state='deprecated', bounds=None):
+  def fit(self, X, y, bounds=None):
     """Create constraints from labels and learn the ITML model.
 
 
@@ -391,12 +359,6 @@ def fit(self, X, y, random_state='deprecated', bounds=None):
     y : (n) array-like
       Data labels.
 
-    random_state : Not used
-      .. deprecated:: 0.5.0
-        `random_state` in the `fit` function was deprecated in version 0.5.0
-        and will be removed in 0.6.0. Set `random_state` at initialization
-        instead (when instantiating a new `ITML_Supervised` object).
-
     bounds : array-like of two numbers
       Bounds on similarity, aside slack variables, s.t.
       ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a``
@@ -406,28 +368,6 @@ def fit(self, X, y, random_state='deprecated', bounds=None):
       set to the 5th and 95th percentile of the pairwise distances among all
       points in the training data `X`.
     """
-    # TODO: remove these in v0.6.0
-    if self.num_labeled != 'deprecated':
-      warnings.warn('"num_labeled" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0', DeprecationWarning)
-    if self.bounds != 'deprecated':
-      warnings.warn('"bounds" parameter from initialization is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0. Use the "bounds" parameter of this '
-                    'fit method instead.', DeprecationWarning)
-    if random_state != 'deprecated':
-      warnings.warn('"random_state" parameter in the `fit` function is '
-                    'deprecated. Set `random_state` at initialization '
-                    'instead (when instantiating a new `ITML_Supervised` '
-                    'object).', DeprecationWarning)
-    else:
-      warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the '
-                    '`random_state` given at initialization to sample '
-                    'constraints, not the default `np.random` from the `fit` '
-                    'method, since this argument is now deprecated. '
-                    'This warning will disappear in v0.6.0.',
-                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py
index 2feed169..bfa3275e 100644
--- a/metric_learn/lfda.py
+++ b/metric_learn/lfda.py
@@ -27,11 +27,6 @@ class LFDA(MahalanobisMixin, TransformerMixin):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
   k : int, optional (default=None)
     Number of nearest neighbors used in local scaling method. If None,
     defaults to min(7, n_features - 1).
@@ -81,12 +76,11 @@ class LFDA(MahalanobisMixin, TransformerMixin):
         -discriminant-analysis-on-beer-style-clustering.html#>`_.
   '''
 
-  def __init__(self, n_components=None, num_dims='deprecated',
+  def __init__(self, n_components=None,
                k=None, embedding_type='weighted', preprocessor=None):
     if embedding_type not in ('weighted', 'orthonormalized', 'plain'):
       raise ValueError('Invalid embedding_type: %r' % embedding_type)
     self.n_components = n_components
-    self.num_dims = num_dims
     self.embedding_type = embedding_type
     self.k = k
     super(LFDA, self).__init__(preprocessor)
@@ -102,11 +96,6 @@ def fit(self, X, y):
     y : (n,) array-like
         Class labels, one per point of data.
     '''
-    if self.num_dims != 'deprecated':
-      warnings.warn('"num_dims" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0. Use "n_components" instead',
-                    DeprecationWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     unique_classes, y = np.unique(y, return_inverse=True)
     n, d = X.shape
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 12eb5ab1..8bdc4bf0 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -2,9 +2,7 @@
 Large Margin Nearest Neighbor Metric learning (LMNN)
 """
 import numpy as np
-import warnings
 from collections import Counter
-from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
 
@@ -25,12 +23,10 @@ class LMNN(MahalanobisMixin, TransformerMixin):
 
   Parameters
   ----------
-  init : None, string or numpy array, optional (default=None)
+  init : string or numpy array, optional (default='auto')
     Initialization of the linear transformation. Possible options are
     'auto', 'pca', 'identity', 'random', and a numpy array of shape
-    (n_features_a, n_features_b). If None, will be set automatically to
-    'auto' (this option is to raise a warning if 'init' is not set, and
-    stays to its default value None, in v0.5.0).
+    (n_features_a, n_features_b).
 
     'auto'
       Depending on ``n_components``, the most reasonable initialization
@@ -83,11 +79,6 @@ class LMNN(MahalanobisMixin, TransformerMixin):
     Tolerance of the optimization procedure. If the objective value varies
     less than `tol`, we consider the algorithm has converged and stop it.
 
-  use_pca : Not used
-    .. deprecated:: 0.5.0
-      `use_pca` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   verbose : bool, optional (default=False)
     Whether to print the progress of the optimization procedure.
 
@@ -102,11 +93,6 @@ class LMNN(MahalanobisMixin, TransformerMixin):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
   random_state : int or numpy.RandomState or None, optional (default=None)
     A pseudo random number generator object or a seed for it if int. If
     ``init='random'``, ``random_state`` is used to initialize the random
@@ -142,10 +128,10 @@ class LMNN(MahalanobisMixin, TransformerMixin):
          2005.
   """
 
-  def __init__(self, init=None, k=3, min_iter=50, max_iter=1000,
+  def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
                learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
-               use_pca='deprecated', verbose=False, preprocessor=None,
-               n_components=None, num_dims='deprecated', random_state=None):
+               verbose=False, preprocessor=None,
+               n_components=None, random_state=None):
     self.init = init
     self.k = k
     self.min_iter = min_iter
@@ -153,24 +139,12 @@ def __init__(self, init=None, k=3, min_iter=50, max_iter=1000,
     self.learn_rate = learn_rate
     self.regularization = regularization
     self.convergence_tol = convergence_tol
-    self.use_pca = use_pca
     self.verbose = verbose
     self.n_components = n_components
-    self.num_dims = num_dims
     self.random_state = random_state
     super(LMNN, self).__init__(preprocessor)
 
   def fit(self, X, y):
-    if self.num_dims != 'deprecated':
-      warnings.warn('"num_dims" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0. Use "n_components" instead',
-                    DeprecationWarning)
-    if self.use_pca != 'deprecated':
-      warnings.warn('"use_pca" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0.',
-                    DeprecationWarning)
     k = self.k
     reg = self.regularization
     learn_rate = self.learn_rate
@@ -184,20 +158,7 @@ def fit(self, X, y):
       raise ValueError('Must have one label per point.')
     self.labels_ = np.arange(len(unique_labels))
 
-    # if the init is the default (None), we raise a warning
-    if self.init is None:
-      # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning
-      msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-             "the default init will now be set to 'auto', instead of the "
-             "previous identity matrix. If you still want to use the identity "
-             "matrix as before, set init='identity'. This warning "
-             "will disappear in v0.6.0, and `init` parameter's default value "
-             "will be set to 'auto'.")
-      warnings.warn(msg, ChangedBehaviorWarning)
-      init = 'auto'
-    else:
-      init = self.init
-    self.components_ = _initialize_components(output_dim, X, y, init,
+    self.components_ = _initialize_components(output_dim, X, y, self.init,
                                               self.verbose,
                                               random_state=self.random_state)
     required_k = np.bincount(label_inds).min()
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 0cf9dc22..28f65ce7 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -2,11 +2,9 @@
 Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML)
 """
 
-import warnings
 import numpy as np
 import scipy.linalg
 from sklearn.base import TransformerMixin
-from sklearn.exceptions import ChangedBehaviorWarning
 
 from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints
@@ -17,7 +15,7 @@ class _BaseLSML(MahalanobisMixin):
 
   _tuple_size = 4  # constraints are quadruplets
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior=None,
+  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
                verbose=False, preprocessor=None, random_state=None):
     self.prior = prior
     self.tol = tol
@@ -40,21 +38,8 @@ def _fit(self, quadruplets, weights=None):
     else:
       self.w_ = weights
     self.w_ /= self.w_.sum()  # weights must sum to 1
-    # if the prior is the default (None), we raise a warning
-    if self.prior is None:
-      msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
-             "the default prior will now be set to "
-             "'identity', instead of 'covariance'. If you still want to use "
-             "the inverse of the covariance matrix as a prior, "
-             "set prior='covariance'. This warning will disappear in "
-             "v0.6.0, and `prior` parameter's default value will be set to "
-             "'identity'.")
-      warnings.warn(msg, ChangedBehaviorWarning)
-      prior = 'identity'
-    else:
-      prior = self.prior
     M, prior_inv = _initialize_metric_mahalanobis(
-        quadruplets, prior,
+        quadruplets, self.prior,
         return_inverse=True, strict_pd=True, matrix_name='prior',
         random_state=self.random_state)
 
@@ -137,13 +122,11 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
 
   Parameters
   ----------
-  prior : None, string or numpy array, optional (default=None)
+  prior : string or numpy array, optional (default='identity')
     Prior to set for the metric. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
     shape (n_features, n_features). For LSML, the prior should be strictly
-    positive definite (PD). If `None`, will be set
-    automatically to 'identity' (this is to raise a warning if
-    `prior` is not set, and stays to its default value (None), in v0.5.0).
+    positive definite (PD).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -256,13 +239,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   max_iter : int, optional (default=1000)
     Number of maximum iterations of the optimization procedure.
 
-  prior : None, string or numpy array, optional (default=None)
+  prior : string or numpy array, optional (default='identity')
     Prior to set for the metric. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
     shape (n_features, n_features). For LSML, the prior should be strictly
-    positive definite (PD). If `None`, will be set
-    automatically to 'identity' (this is to raise a warning if
-    `prior` is not set, and stays to its default value (None), in v0.5.0).
+    positive definite (PD).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -280,11 +261,6 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  num_labeled : Not used
-    .. deprecated:: 0.5.0
-      `num_labeled` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   num_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
@@ -326,17 +302,16 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
     metric (See function `components_from_metric`.)
   """
 
-  def __init__(self, tol=1e-3, max_iter=1000, prior=None,
-               num_labeled='deprecated', num_constraints=None, weights=None,
+  def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
+               num_constraints=None, weights=None,
                verbose=False, preprocessor=None, random_state=None):
     _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
                        verbose=verbose, preprocessor=preprocessor,
                        random_state=random_state)
-    self.num_labeled = num_labeled
     self.num_constraints = num_constraints
     self.weights = weights
 
-  def fit(self, X, y, random_state='deprecated'):
+  def fit(self, X, y):
     """Create constraints from labels and learn the LSML model.
 
     Parameters
@@ -346,29 +321,7 @@ def fit(self, X, y, random_state='deprecated'):
 
     y : (n) array-like
       Data labels.
-
-    random_state : Not used
-      .. deprecated:: 0.5.0
-        `random_state` in the `fit` function was deprecated in version 0.5.0
-        and will be removed in 0.6.0. Set `random_state` at initialization
-        instead (when instantiating a new `LSML_Supervised` object).
     """
-    if self.num_labeled != 'deprecated':
-      warnings.warn('"num_labeled" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0', DeprecationWarning)
-    if random_state != 'deprecated':
-      warnings.warn('"random_state" parameter in the `fit` function is '
-                    'deprecated. Set `random_state` at initialization '
-                    'instead (when instantiating a new `LSML_Supervised` '
-                    'object).', DeprecationWarning)
-    else:
-      warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the '
-                    '`random_state` given at initialization to sample '
-                    'constraints, not the default `np.random` from the `fit` '
-                    'method, since this argument is now deprecated. '
-                    'This warning will disappear in v0.6.0.',
-                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index 9b84dba8..01d185e7 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -8,7 +8,7 @@
 from scipy.optimize import minimize
 from scipy.special import logsumexp
 from sklearn.base import TransformerMixin
-from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.metrics import pairwise_distances
 
 from .base_metric import MahalanobisMixin
@@ -32,17 +32,10 @@ class MLKR(MahalanobisMixin, TransformerMixin):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
-  init : None, string or numpy array, optional (default=None)
+  init : string or numpy array, optional (default='auto')
     Initialization of the linear transformation. Possible options are
     'auto', 'pca', 'identity', 'random', and a numpy array of shape
-    (n_features_a, n_features_b). If None, will be set automatically to
-    'auto' (this option is to raise a warning if 'init' is not set,
-    and stays to its default value None, in v0.5.0).
+    (n_features_a, n_features_b).
 
     'auto'
       Depending on ``n_components``, the most reasonable initialization
@@ -70,11 +63,6 @@ class MLKR(MahalanobisMixin, TransformerMixin):
       :meth:`fit` and n_features_a must be less than or equal to that.
       If ``n_components`` is not None, n_features_a must match it.
 
-  A0 : Not used.
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'init' instead.
-
   tol : float, optional (default=None)
     Convergence tolerance for the optimization.
 
@@ -120,13 +108,11 @@ class MLKR(MahalanobisMixin, TransformerMixin):
          /weinberger07a.pdf>`_. AISTATS 2007.
   """
 
-  def __init__(self, n_components=None, num_dims='deprecated', init=None,
-               A0='deprecated', tol=None, max_iter=1000, verbose=False,
+  def __init__(self, n_components=None, init='auto',
+               tol=None, max_iter=1000, verbose=False,
                preprocessor=None, random_state=None):
     self.n_components = n_components
-    self.num_dims = num_dims
     self.init = init
-    self.A0 = A0
     self.tol = tol
     self.max_iter = max_iter
     self.verbose = verbose
@@ -142,18 +128,6 @@ def fit(self, X, y):
       X : (n x d) array of samples
       y : (n) data labels
       """
-      if self.A0 != 'deprecated':
-        warnings.warn('"A0" parameter is not used.'
-                      ' It has been deprecated in version 0.5.0 and will be'
-                      'removed in 0.6.0. Use "init" instead.',
-                      DeprecationWarning)
-
-      if self.num_dims != 'deprecated':
-        warnings.warn('"num_dims" parameter is not used.'
-                      ' It has been deprecated in version 0.5.0 and will be'
-                      ' removed in 0.6.0. Use "n_components" instead',
-                      DeprecationWarning)
-
       X, y = self._prepare_inputs(X, y, y_numeric=True,
                                   ensure_min_samples=2)
       n, d = X.shape
@@ -166,19 +140,7 @@ def fit(self, X, y):
       if m is None:
           m = d
       # if the init is the default (None), we raise a warning
-      if self.init is None:
-        # TODO:
-        #  replace init=None by init='auto' in v0.6.0 and remove the warning
-        msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-               "the default init will now be set to 'auto', instead of 'pca'. "
-               "If you still want to use PCA as an init, set init='pca'. "
-               "This warning will disappear in v0.6.0, and `init` parameter's"
-               " default value will be set to 'auto'.")
-        warnings.warn(msg, ChangedBehaviorWarning)
-        init = 'auto'
-      else:
-        init = self.init
-      A = _initialize_components(m, X, y, init=init,
+      A = _initialize_components(m, X, y, init=self.init,
                                  random_state=self.random_state,
                                  # MLKR works on regression targets:
                                  has_classes=False)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 330e2113..e4f89cfe 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -1,9 +1,7 @@
 """Mahalanobis Metric for Clustering (MMC)"""
-import warnings
 import numpy as np
 from sklearn.base import TransformerMixin
 from sklearn.utils.validation import assert_all_finite
-from sklearn.exceptions import ChangedBehaviorWarning
 
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
@@ -15,14 +13,13 @@ class _BaseMMC(MahalanobisMixin):
   _tuple_size = 2  # constraints are pairs
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
-               init=None, A0='deprecated', diagonal=False,
+               init='identity', diagonal=False,
                diagonal_c=1.0, verbose=False, preprocessor=None,
                random_state=None):
     self.max_iter = max_iter
     self.max_proj = max_proj
     self.convergence_threshold = convergence_threshold
     self.init = init
-    self.A0 = A0
     self.diagonal = diagonal
     self.diagonal_c = diagonal_c
     self.verbose = verbose
@@ -30,30 +27,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     super(_BaseMMC, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
-    if self.A0 != 'deprecated':
-      warnings.warn('"A0" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "init" instead.',
-                    DeprecationWarning)
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
 
-    if self.init is None:
-      # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning
-      msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-             "the default init will now be set to 'identity', instead of the "
-             "identity divided by a scaling factor of 10. "
-             "If you still want to use the same init as in previous "
-             "versions, set init=np.eye(d)/10, where d is the dimension "
-             "of your input space (d=pairs.shape[1]). "
-             "This warning will disappear in v0.6.0, and `init` parameter's"
-             " default value will be set to 'auto'.")
-      warnings.warn(msg, ChangedBehaviorWarning)
-      init = 'identity'
-    else:
-      init = self.init
-
-    self.A_ = _initialize_metric_mahalanobis(pairs, init,
+    self.A_ = _initialize_metric_mahalanobis(pairs, self.init,
                                              random_state=self.random_state,
                                              matrix_name='init')
 
@@ -358,12 +335,10 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
   convergence_threshold : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
-  init : None, string or numpy array, optional (default=None)
+  init : string or numpy array, optional (default='identity')
     Initialization of the Mahalanobis matrix. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
-    shape (n_features, n_features). If None, will be set
-    automatically to 'identity' (this is to raise a warning if
-    'init' is not set, and stays to its default value (None), in v0.5.0).
+    shape (n_features, n_features).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -381,11 +356,6 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
       An SPD matrix of shape (n_features, n_features), that will
       be used as such to initialize the metric.
 
-  A0 : Not used.
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'init' instead.
-
   diagonal : bool, optional (default=False)
     If True, a diagonal metric will be learned,
     i.e., a simple scaling of dimensions. The initialization will then
@@ -502,21 +472,14 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   convergence_threshold : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
-  num_labeled : Not used
-    .. deprecated:: 0.5.0
-      `num_labeled` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   num_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
-  init : None, string or numpy array, optional (default=None)
+  init : string or numpy array, optional (default='identity')
     Initialization of the Mahalanobis matrix. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
-    shape (n_features, n_features). If None, will be set
-    automatically to 'identity' (this is to raise a warning if
-    'init' is not set, and stays to its default value (None), in v0.5.0).
+    shape (n_features, n_features).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -533,11 +496,6 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
       A numpy array of shape (n_features, n_features), that will
       be used as such to initialize the metric.
 
-  A0 : Not used.
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'init' instead.
-
   diagonal : bool, optional (default=False)
     If True, a diagonal metric will be learned,
     i.e., a simple scaling of dimensions. The initialization will then
@@ -581,18 +539,17 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   """
 
   def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
-               num_labeled='deprecated', num_constraints=None, init=None,
-               A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False,
+               num_constraints=None, init='identity',
+               diagonal=False, diagonal_c=1.0, verbose=False,
                preprocessor=None, random_state=None):
     _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
                       convergence_threshold=convergence_threshold,
-                      init=init, A0=A0, diagonal=diagonal,
+                      init=init, diagonal=diagonal,
                       diagonal_c=diagonal_c, verbose=verbose,
                       preprocessor=preprocessor, random_state=random_state)
-    self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
-  def fit(self, X, y, random_state='deprecated'):
+  def fit(self, X, y):
     """Create constraints from labels and learn the MMC model.
 
     Parameters
@@ -602,29 +559,7 @@ def fit(self, X, y, random_state='deprecated'):
 
     y : (n) array-like
       Data labels.
-
-    random_state : Not used
-      .. deprecated:: 0.5.0
-        `random_state` in the `fit` function was deprecated in version 0.5.0
-        and will be removed in 0.6.0. Set `random_state` at initialization
-        instead (when instantiating a new `MMC_Supervised` object).
     """
-    if self.num_labeled != 'deprecated':
-      warnings.warn('"num_labeled" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0', DeprecationWarning)
-    if random_state != 'deprecated':
-      warnings.warn('"random_state" parameter in the `fit` function is '
-                    'deprecated. Set `random_state` at initialization '
-                    'instead (when instantiating a new `MMC_Supervised` '
-                    'object).', DeprecationWarning)
-    else:
-      warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the '
-                    '`random_state` given at initialization to sample '
-                    'constraints, not the default `np.random` from the `fit` '
-                    'method, since this argument is now deprecated. '
-                    'This warning will disappear in v0.6.0.',
-                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index 217d7d28..7b4423d3 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -9,7 +9,7 @@
 from scipy.optimize import minimize
 from scipy.special import logsumexp
 from sklearn.base import TransformerMixin
-from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.metrics import pairwise_distances
 
 from ._util import _initialize_components, _check_n_components
@@ -32,12 +32,10 @@ class NCA(MahalanobisMixin, TransformerMixin):
 
   Parameters
   ----------
-  init : None, string or numpy array, optional (default=None)
+  init : string or numpy array, optional (default='auto')
     Initialization of the linear transformation. Possible options are
     'auto', 'pca', 'identity', 'random', and a numpy array of shape
-    (n_features_a, n_features_b). If None, will be set automatically to
-    'auto' (this option is to raise a warning if 'init' is not set,
-    and stays to its default value None, in v0.5.0).
+    (n_features_a, n_features_b).
 
     'auto'
       Depending on ``n_components``, the most reasonable initialization
@@ -77,11 +75,6 @@ class NCA(MahalanobisMixin, TransformerMixin):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
   max_iter : int, optional (default=100)
     Maximum number of iterations done by the optimization algorithm.
 
@@ -128,12 +121,11 @@ class NCA(MahalanobisMixin, TransformerMixin):
          <https://en.wikipedia.org/wiki/Neighbourhood_components_analysis>`_
   """
 
-  def __init__(self, init=None, n_components=None, num_dims='deprecated',
+  def __init__(self, init='auto', n_components=None,
                max_iter=100, tol=None, verbose=False, preprocessor=None,
                random_state=None):
     self.n_components = n_components
     self.init = init
-    self.num_dims = num_dims
     self.max_iter = max_iter
     self.tol = tol
     self.verbose = verbose
@@ -145,11 +137,6 @@ def fit(self, X, y):
     X: data matrix, (n x d)
     y: scalar labels, (n)
     """
-    if self.num_dims != 'deprecated':
-      warnings.warn('"num_dims" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0. Use "n_components" instead',
-                    DeprecationWarning)
     X, labels = self._prepare_inputs(X, y, ensure_min_samples=2)
     n, d = X.shape
     n_components = _check_n_components(d, self.n_components)
@@ -158,22 +145,8 @@ def fit(self, X, y):
     train_time = time.time()
 
     # Initialize A
-    # if the init is the default (None), we raise a warning
-    if self.init is None:
-      # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning
-      msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-             "the default init will now be set to 'auto', instead of the "
-             "previous scaling matrix. If you still want to use the same "
-             "scaling matrix as before, set "
-             "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)"
-             ", EPS))). This warning will disappear in v0.6.0, and `init` "
-             "parameter's default value will be set to 'auto'.")
-      warnings.warn(msg, ChangedBehaviorWarning)
-      init = 'auto'
-    else:
-      init = self.init
-    A = _initialize_components(n_components, X, labels, init, self.verbose,
-                               self.random_state)
+    A = _initialize_components(n_components, X, labels, self.init,
+                               self.verbose, self.random_state)
 
     # Run NCA
     mask = labels[:, np.newaxis] == labels[np.newaxis, :]
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 2004b9d4..34f7f3ff 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -5,7 +5,6 @@
 import numpy as np
 import warnings
 from sklearn.base import TransformerMixin
-from sklearn.exceptions import ChangedBehaviorWarning
 
 from ._util import _check_n_components
 from .base_metric import MahalanobisMixin
@@ -43,16 +42,6 @@ class RCA(MahalanobisMixin, TransformerMixin):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
-  pca_comps : Not used
-    .. deprecated:: 0.5.0
-      `pca_comps` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   preprocessor : array-like, shape=(n_samples, n_features) or callable
     The preprocessor to call to get tuples from indices. If array-like,
     tuples will be formed like this: X[indices].
@@ -82,11 +71,8 @@ class RCA(MahalanobisMixin, TransformerMixin):
     The learned linear transformation ``L``.
   """
 
-  def __init__(self, n_components=None, num_dims='deprecated',
-               pca_comps='deprecated', preprocessor=None):
+  def __init__(self, n_components=None, preprocessor=None):
     self.n_components = n_components
-    self.num_dims = num_dims
-    self.pca_comps = pca_comps
     super(RCA, self).__init__(preprocessor)
 
   def _check_dimension(self, rank, X):
@@ -115,29 +101,8 @@ def fit(self, X, chunks):
       When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
       When ``chunks[i] == j``, point i belongs to chunklet j.
     """
-    if self.num_dims != 'deprecated':
-      warnings.warn('"num_dims" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0. Use "n_components" instead',
-                    DeprecationWarning)
-
-    if self.pca_comps != 'deprecated':
-      warnings.warn(
-          '"pca_comps" parameter is not used. '
-          'It has been deprecated in version 0.5.0 and will be'
-          'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If '
-          'you still want to do it, you could use '
-          '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.',
-          DeprecationWarning)
-
     X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2)
 
-    warnings.warn(
-        "RCA will no longer center the data before training. If you want "
-        "to do some preprocessing, you should do it manually (you can also "
-        "use an `sklearn.pipeline.Pipeline` for instance). This warning "
-        "will disappear in version 0.6.0.", ChangedBehaviorWarning)
-
     chunks = np.asanyarray(chunks, dtype=int)
     chunk_mask, chunked_data = _chunk_mean_centering(X, chunks)
 
@@ -177,11 +142,6 @@ class RCA_Supervised(RCA):
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_dims : Not used
-    .. deprecated:: 0.5.0
-      `num_dims` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use `n_components` instead.
-
   num_chunks: int, optional (default=100)
     Number of chunks to generate.
 
@@ -212,17 +172,15 @@ class RCA_Supervised(RCA):
     The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims='deprecated', n_components=None,
-               pca_comps='deprecated', num_chunks=100, chunk_size=2,
+  def __init__(self, n_components=None, num_chunks=100, chunk_size=2,
                preprocessor=None, random_state=None):
     """Initialize the supervised version of `RCA`."""
-    RCA.__init__(self, num_dims=num_dims, n_components=n_components,
-                 pca_comps=pca_comps, preprocessor=preprocessor)
+    RCA.__init__(self, n_components=n_components, preprocessor=preprocessor)
     self.num_chunks = num_chunks
     self.chunk_size = chunk_size
     self.random_state = random_state
 
-  def fit(self, X, y, random_state='deprecated'):
+  def fit(self, X, y):
     """Create constraints from labels and learn the RCA model.
     Needs num_constraints specified in constructor.
 
@@ -232,25 +190,7 @@ def fit(self, X, y, random_state='deprecated'):
       each row corresponds to a single instance
 
     y : (n) data labels
-
-    random_state : Not used
-      .. deprecated:: 0.5.0
-        `random_state` in the `fit` function was deprecated in version 0.5.0
-        and will be removed in 0.6.0. Set `random_state` at initialization
-        instead (when instantiating a new `RCA_Supervised` object).
     """
-    if random_state != 'deprecated':
-      warnings.warn('"random_state" parameter in the `fit` function is '
-                    'deprecated. Set `random_state` at initialization '
-                    'instead (when instantiating a new `RCA_Supervised` '
-                    'object).', DeprecationWarning)
-    else:
-      warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the '
-                    '`random_state` given at initialization to sample '
-                    'constraints, not the default `np.random` from the `fit` '
-                    'method, since this argument is now deprecated. '
-                    'This warning will disappear in v0.6.0.',
-                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     chunks = Constraints(y).chunks(num_chunks=self.num_chunks,
                                    chunk_size=self.chunk_size,
diff --git a/metric_learn/scml.py b/metric_learn/scml.py
index 7bbd101a..c3fde272 100644
--- a/metric_learn/scml.py
+++ b/metric_learn/scml.py
@@ -308,6 +308,10 @@ class SCML(_BaseSCML, _TripletsClassifierMixin):
 
   Read more in the :ref:`User Guide <scml>`.
 
+  .. warning::
+    SCML is still a bit experimental, don't hesitate to report if
+    something fails/doesn't work as expected.
+
   Parameters
   ----------
   beta: float (default=1e-5)
@@ -413,6 +417,10 @@ class SCML_Supervised(_BaseSCML, TransformerMixin):
 
   Read more in the :ref:`User Guide <scml>`.
 
+  .. warning::
+    SCML is still a bit experimental, don't hesitate to report if
+    something fails/doesn't work as expected.
+
   Parameters
   ----------
   beta: float (default=1e-5)
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index f7c801e8..a0736ffa 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -7,7 +7,7 @@
 from sklearn.base import TransformerMixin
 from scipy.linalg import pinvh
 from sklearn.covariance import graphical_lasso
-from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
+from sklearn.exceptions import ConvergenceWarning
 
 from .base_metric import MahalanobisMixin, _PairsClassifierMixin
 from .constraints import Constraints, wrap_pairs
@@ -24,23 +24,17 @@ class _BaseSDML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
-               use_cov='deprecated', verbose=False, preprocessor=None,
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
+               verbose=False, preprocessor=None,
                random_state=None):
     self.balance_param = balance_param
     self.sparsity_param = sparsity_param
     self.prior = prior
-    self.use_cov = use_cov
     self.verbose = verbose
     self.random_state = random_state
     super(_BaseSDML, self).__init__(preprocessor)
 
   def _fit(self, pairs, y):
-    if self.use_cov != 'deprecated':
-      warnings.warn('"use_cov" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    'removed in 0.6.0. Use "prior" instead.',
-                    DeprecationWarning)
     if not HAS_SKGGM:
       if self.verbose:
         print("SDML will use scikit-learn's graphical lasso solver.")
@@ -52,23 +46,8 @@ def _fit(self, pairs, y):
 
     # set up (the inverse of) the prior M
     # if the prior is the default (None), we raise a warning
-    if self.prior is None:
-      # TODO:
-      #  replace prior=None by prior='identity' in v0.6.0 and remove the
-      #  warning
-      msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
-             "the default prior will now be set to "
-             "'identity', instead of 'covariance'. If you still want to use "
-             "the inverse of the covariance matrix as a prior, "
-             "set prior='covariance'. This warning will disappear in "
-             "v0.6.0, and `prior` parameter's default value will be set to "
-             "'identity'.")
-      warnings.warn(msg, ChangedBehaviorWarning)
-      prior = 'identity'
-    else:
-      prior = self.prior
     _, prior_inv = _initialize_metric_mahalanobis(
-        pairs, prior,
+        pairs, self.prior,
         return_inverse=True, strict_pd=True, matrix_name='prior',
         random_state=self.random_state)
     diff = pairs[:, 0] - pairs[:, 1]
@@ -147,13 +126,11 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
   sparsity_param : float, optional  (default=0.01)
     Trade off between optimizer and sparseness (see graph_lasso).
 
-  prior : None, string or numpy array, optional (default=None)
+  prior : string or numpy array, optional (default='identity')
     Prior to set for the metric. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
     shape (n_features, n_features). For SDML, the prior should be strictly
-    positive definite (PD). If `None`, will be set
-    automatically to 'identity' (this is to raise a warning if
-    `prior` is not set, and stays to its default value (None), in v0.5.0).
+    positive definite (PD).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -171,11 +148,6 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  use_cov : Not used.
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'prior' instead.
-
   verbose : bool, optional (default=False)
     If True, prints information while learning.
 
@@ -268,13 +240,11 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
   sparsity_param : float, optional (default=0.01)
     Trade off between optimizer and sparseness (see graph_lasso).
 
-  prior : None, string or numpy array, optional (default=None)
+  prior : string or numpy array, optional (default='identity')
     Prior to set for the metric. Possible options are
     'identity', 'covariance', 'random', and a numpy array of
     shape (n_features, n_features). For SDML, the prior should be strictly
-    positive definite (PD). If `None`, will be set
-    automatically to 'identity' (this is to raise a warning if
-    `prior` is not set, and stays to its default value (None), in v0.5.0).
+    positive definite (PD).
 
     'identity'
       An identity matrix of shape (n_features, n_features).
@@ -292,16 +262,6 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  use_cov : Not used.
-    .. deprecated:: 0.5.0
-      `A0` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0. Use 'prior' instead.
-
-  num_labeled : Not used
-    .. deprecated:: 0.5.0
-      `num_labeled` was deprecated in version 0.5.0 and will
-      be removed in 0.6.0.
-
   num_constraints : int, optional (default=None)
     Number of constraints to generate. If None, defaults to `20 *
     num_classes**2`.
@@ -332,18 +292,16 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
     that describes the supervised version of weakly supervised estimators.
   """
 
-  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
-               use_cov='deprecated', num_labeled='deprecated',
+  def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
                num_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
     _BaseSDML.__init__(self, balance_param=balance_param,
                        sparsity_param=sparsity_param, prior=prior,
-                       use_cov=use_cov, verbose=verbose,
+                       verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
-    self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
-  def fit(self, X, y, random_state='deprecated'):
+  def fit(self, X, y):
     """Create constraints from labels and learn the SDML model.
 
     Parameters
@@ -354,33 +312,11 @@ def fit(self, X, y, random_state='deprecated'):
     y : array-like, shape (n,)
       data labels, one for each instance
 
-    random_state : Not used
-      .. deprecated:: 0.5.0
-        `random_state` in the `fit` function was deprecated in version 0.5.0
-        and will be removed in 0.6.0. Set `random_state` at initialization
-        instead (when instantiating a new `SDML_Supervised` object).
-
     Returns
     -------
     self : object
       Returns the instance.
     """
-    if self.num_labeled != 'deprecated':
-      warnings.warn('"num_labeled" parameter is not used.'
-                    ' It has been deprecated in version 0.5.0 and will be'
-                    ' removed in 0.6.0', DeprecationWarning)
-    if random_state != 'deprecated':
-      warnings.warn('"random_state" parameter in the `fit` function is '
-                    'deprecated. Set `random_state` at initialization '
-                    'instead (when instantiating a new `SDML_Supervised` '
-                    'object).', DeprecationWarning)
-    else:
-      warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the '
-                    '`random_state` given at initialization to sample '
-                    'constraints, not the default `np.random` from the `fit` '
-                    'method, since this argument is now deprecated. '
-                    'This warning will disappear in v0.6.0.',
-                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index b6b9eea2..4db0a1fc 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -10,7 +10,7 @@
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_allclose)
 from sklearn.utils.testing import assert_warns_message
-from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.validation import check_X_y
 from sklearn.preprocessing import StandardScaler
 try:
@@ -23,7 +23,7 @@
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           SCML_Supervised, LSML_Supervised,
                           ITML_Supervised, SDML_Supervised, RCA_Supervised,
-                          MMC_Supervised, SDML, RCA, ITML, LSML, SCML)
+                          MMC_Supervised, SDML, RCA, ITML, SCML)
 # Import this specially for testing.
 from metric_learn.constraints import wrap_pairs, Constraints
 from metric_learn.lmnn import _sum_outer_products
@@ -313,74 +313,6 @@ def test_iris(self):
     csep = class_separation(lsml.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.8)  # it's pretty terrible
 
-  def test_deprecation_num_labeled(self):
-    # test that a deprecation message is thrown if num_labeled is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lsml_supervised = LSML_Supervised(num_labeled=np.inf)
-    msg = ('"num_labeled" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0')
-    assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y)
-
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lsml_supervised = LSML_Supervised()
-    msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
-           "the default prior will now be set to "
-           "'identity', instead of 'covariance'. If you still want to use "
-           "the inverse of the covariance matrix as a prior, "
-           "set prior='covariance'. This warning will disappear in "
-           "v0.6.0, and `prior` parameter's default value will be set to "
-           "'identity'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      lsml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.], [-5., 3.], [5., 0.]],
-                      [[0., 50.], [0., -60], [-10., 0.], [10., 0.]]])
-    lsml = LSML()
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      lsml.fit(pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_deprecation_random_state(self):
-    # test that a deprecation message is thrown if random_state is set at
-    # fit time
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lsml_supervised = LSML_Supervised()
-    msg = ('"random_state" parameter in the `fit` function is '
-           'deprecated. Set `random_state` at initialization '
-           'instead (when instantiating a new `LSML_Supervised` '
-           'object).')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      lsml_supervised.fit(X, y, random_state=np.random)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning_random_state(self):
-    # test that a ChangedBehavior warning is thrown if the random_state is
-    # not set in fit.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lsml_supervised = LSML_Supervised()
-    msg = ('As of v0.5.0, `LSML_Supervised` now uses the '
-           '`random_state` given at initialization to sample '
-           'constraints, not the default `np.random` from the `fit` '
-           'method, since this argument is now deprecated. '
-           'This warning will disappear in v0.6.0.')
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      lsml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
 
 class TestITML(MetricTestCase):
   def test_iris(self):
@@ -390,83 +322,6 @@ def test_iris(self):
     csep = class_separation(itml.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.2)
 
-  def test_deprecation_num_labeled(self):
-    # test that a deprecation message is thrown if num_labeled is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    itml_supervised = ITML_Supervised(num_labeled=np.inf)
-    msg = ('"num_labeled" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0')
-    assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y)
-
-  def test_deprecation_bounds(self):
-    # test that a deprecation message is thrown if bounds is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    itml_supervised = ITML_Supervised(bounds=None)
-    msg = ('"bounds" parameter from initialization is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0. Use the "bounds" parameter of this '
-           'fit method instead.')
-    assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y)
-
-  def test_deprecation_A0(self):
-    # test that a deprecation message is thrown if A0 is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    itml_supervised = ITML_Supervised(A0=np.ones_like(X))
-    msg = ('"A0" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "prior" instead.')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      itml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
-    y_pairs = [1, -1]
-    itml = ITML(A0=np.ones_like(X))
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      itml.fit(pairs, y_pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_deprecation_random_state(self):
-    # test that a deprecation message is thrown if random_state is set at
-    # fit time
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    itml_supervised = ITML_Supervised()
-    msg = ('"random_state" parameter in the `fit` function is '
-           'deprecated. Set `random_state` at initialization '
-           'instead (when instantiating a new `ITML_Supervised` '
-           'object).')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      itml_supervised.fit(X, y, random_state=np.random)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning_random_state(self):
-    # test that a ChangedBehavior warning is thrown if the random_state is
-    # not set in fit.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    itml_supervised = ITML_Supervised()
-    msg = ('As of v0.5.0, `ITML_Supervised` now uses the '
-           '`random_state` given at initialization to sample '
-           'constraints, not the default `np.random` from the `fit` '
-           'method, since this argument is now deprecated. '
-           'This warning will disappear in v0.6.0.')
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      itml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
 
 @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.],
                                     np.array([20., 100.]),
@@ -557,35 +412,6 @@ def grad(x):
                 np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon)))
     np.testing.assert_almost_equal(rel_diff, 0., decimal=5)
 
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lmnn = LMNN(k=2)
-    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-           "the default init will now be set to 'auto', instead of the "
-           "previous identity matrix. If you still want to use the identity "
-           "matrix as before, set init='identity'. This warning "
-           "will disappear in v0.6.0, and `init` parameter's default value "
-           "will be set to 'auto'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      lmnn.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_deprecation_use_pca(self):
-    # test that a DeprecationWarning is thrown about use_pca, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    lmnn = LMNN(k=2, use_pca=True)
-    msg = ('"use_pca" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0.')
-    assert_warns_message(DeprecationWarning, msg, lmnn.fit, X, y)
-
 
 def test_loss_func(capsys):
   """Test the loss function (and its gradient) on a simple example,
@@ -803,8 +629,7 @@ def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self):
     # load_iris: dataset where we know scikit-learn's graphical lasso fails
     # with a Floating Point error
     X, y = load_iris(return_X_y=True)
-    sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True,
-                                      sparsity_param=0.01)
+    sdml_supervised = SDML_Supervised(balance_param=0.5, sparsity_param=0.01)
     msg = ("There was a problem in SDML when using scikit-learn's graphical "
            "lasso solver. skggm's graphical lasso can sometimes converge on "
            "non SPD cases where scikit-learn's graphical lasso fails to "
@@ -907,24 +732,12 @@ def test_iris(self):
     rs = np.random.RandomState(5555)
 
     sdml = SDML_Supervised(num_constraints=1500, prior='identity',
-                           balance_param=5e-5)
-    sdml.fit(self.iris_points, self.iris_labels, random_state=rs)
+                           balance_param=5e-5, random_state=rs)
+    sdml.fit(self.iris_points, self.iris_labels)
     csep = class_separation(sdml.transform(self.iris_points),
                             self.iris_labels)
     self.assertLess(csep, 0.22)
 
-  def test_deprecation_num_labeled(self):
-    # test that a deprecation message is thrown if num_labeled is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X, y = make_classification(random_state=42)
-    sdml_supervised = SDML_Supervised(num_labeled=np.inf, prior='identity',
-                                      balance_param=5e-5)
-    msg = ('"num_labeled" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0')
-    assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y)
-
   def test_sdml_raises_warning_non_psd(self):
     """Tests that SDML raises a warning on a toy example where we know the
     pseudo-covariance matrix is not PSD"""
@@ -967,83 +780,6 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
                            random_state=np.random.RandomState(42))
     sdml.fit(X, y)
 
-  def test_deprecation_use_cov(self):
-    # test that a deprecation message is thrown if use_cov  is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    sdml_supervised = SDML_Supervised(use_cov=np.ones_like(X),
-                                      balance_param=1e-5)
-    msg = ('"use_cov" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "prior" instead.')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      sdml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
-    y_pairs = [1, -1]
-    sdml = SDML(use_cov=np.ones_like(X), balance_param=1e-5)
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      sdml.fit(pairs, y_pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used (except for the balance_param that we need
-    # to set for the algorithm to not diverge)
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    sdml_supervised = SDML_Supervised(balance_param=1e-5)
-    msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, "
-           "the default prior will now be set to "
-           "'identity', instead of 'covariance'. If you still want to use "
-           "the inverse of the covariance matrix as a prior, "
-           "set prior='covariance'. This warning will disappear in "
-           "v0.6.0, and `prior` parameter's default value will be set to "
-           "'identity'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      sdml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
-    y_pairs = [1, -1]
-    sdml = SDML(balance_param=1e-5)
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      sdml.fit(pairs, y_pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_deprecation_random_state(self):
-    # test that a deprecation message is thrown if random_state is set at
-    # fit time
-    # TODO: remove in v.0.6
-    X, y = load_iris(return_X_y=True)
-    sdml_supervised = SDML_Supervised(balance_param=5e-5)
-    msg = ('"random_state" parameter in the `fit` function is '
-           'deprecated. Set `random_state` at initialization '
-           'instead (when instantiating a new `SDML_Supervised` '
-           'object).')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      sdml_supervised.fit(X, y, random_state=np.random)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning_random_state(self):
-    # test that a ChangedBehavior warning is thrown if the random_state is
-    # not set in fit.
-    # TODO: remove in v.0.6
-    X, y = load_iris(return_X_y=True)
-    sdml_supervised = SDML_Supervised(balance_param=5e-5)
-    msg = ('As of v0.5.0, `SDML_Supervised` now uses the '
-           '`random_state` given at initialization to sample '
-           'constraints, not the default `np.random` from the `fit` '
-           'method, since this argument is now deprecated. '
-           'This warning will disappear in v0.6.0.')
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      sdml_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
 
 @pytest.mark.skipif(not HAS_SKGGM,
                     reason='The message should be printed only if skggm is '
@@ -1209,40 +945,6 @@ def test_one_class(self):
       nca.fit(X, y)
       assert_array_equal(nca.components_, A)
 
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    nca = NCA()
-    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-           "the default init will now be set to 'auto', instead of the "
-           "previous scaling matrix. If you still want to use the same "
-           "scaling matrix as before, set "
-           "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)"
-           ", EPS))). This warning will disappear in v0.6.0, and `init` "
-           "parameter's default value will be set to 'auto'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      nca.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-
-@pytest.mark.parametrize('num_dims', [None, 2])
-def test_deprecation_num_dims_nca(num_dims):
-  # test that a deprecation message is thrown if num_dims is set at
-  # initialization
-  # TODO: remove in v.0.6
-  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-  y = np.array([1, 0, 1, 0])
-  nca = NCA(num_dims=num_dims)
-  msg = ('"num_dims" parameter is not used.'
-         ' It has been deprecated in version 0.5.0 and will be'
-         ' removed in 0.6.0. Use "n_components" instead')
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    nca.fit(X, y)
-  assert (str(raised_warning[0].message) == msg)
-
 
 class TestLFDA(MetricTestCase):
   def test_iris(self):
@@ -1256,22 +958,6 @@ def test_iris(self):
     self.assertEqual(lfda.components_.shape, (2, 4))
 
 
-@pytest.mark.parametrize('num_dims', [None, 2])
-def test_deprecation_num_dims_lfda(num_dims):
-  # test that a deprecation message is thrown if num_dims is set at
-  # initialization
-  # TODO: remove in v.0.6
-  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-  y = np.array([1, 0, 1, 0])
-  lfda = LFDA(num_dims=num_dims)
-  msg = ('"num_dims" parameter is not used.'
-         ' It has been deprecated in version 0.5.0 and will be'
-         ' removed in 0.6.0. Use "n_components" instead')
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    lfda.fit(X, y)
-  assert (str(raised_warning[0].message) == msg)
-
-
 class TestRCA(MetricTestCase):
   def test_iris(self):
     rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
@@ -1279,46 +965,6 @@ def test_iris(self):
     csep = class_separation(rca.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.29)
 
-  def test_deprecation_pca_comps(self):
-    # test that a deprecation message is thrown if pca_comps is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X, y = make_classification(random_state=42, n_samples=100)
-    rca_supervised = RCA_Supervised(pca_comps=X.shape[1], num_chunks=20)
-    msg = ('"pca_comps" parameter is not used. '
-           'It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If '
-           'you still want to do it, you could use '
-           '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.')
-    with pytest.warns(ChangedBehaviorWarning) as expected_msg:
-      rca_supervised.fit(X, y)
-    assert any(str(w.message) == msg for w in expected_msg)
-
-    rca = RCA(pca_comps=X.shape[1])
-    with pytest.warns(ChangedBehaviorWarning) as expected_msg:
-      rca.fit(X, y)
-    assert any(str(w.message) == msg for w in expected_msg)
-
-  def test_changedbehaviorwarning_preprocessing(self):
-    # test that a ChangedBehaviorWarning is thrown when using RCA
-    # TODO: remove in v.0.6
-
-    msg = ("RCA will no longer center the data before training. If you want "
-           "to do some preprocessing, you should do it manually (you can also "
-           "use an `sklearn.pipeline.Pipeline` for instance). This warning "
-           "will disappear in version 0.6.0.")
-
-    X, y = make_classification(random_state=42, n_samples=100)
-    rca_supervised = RCA_Supervised(num_chunks=20)
-    with pytest.warns(ChangedBehaviorWarning) as expected_msg:
-      rca_supervised.fit(X, y)
-    assert any(str(w.message) == msg for w in expected_msg)
-
-    rca = RCA()
-    with pytest.warns(ChangedBehaviorWarning) as expected_msg:
-      rca.fit(X, y)
-    assert any(str(w.message) == msg for w in expected_msg)
-
   def test_rank_deficient_returns_warning(self):
     """Checks that if the covariance matrix is not invertible, we raise a
     warning message advising to use PCA"""
@@ -1338,35 +984,6 @@ def test_rank_deficient_returns_warning(self):
       rca.fit(X, y)
     assert any(str(w.message) == msg for w in raised_warnings)
 
-  def test_deprecation_random_state(self):
-    # test that a deprecation message is thrown if random_state is set at
-    # fit time
-    # TODO: remove in v.0.6
-    X, y = make_classification(random_state=42, n_samples=100)
-    rca_supervised = RCA_Supervised(num_chunks=20)
-    msg = ('"random_state" parameter in the `fit` function is '
-           'deprecated. Set `random_state` at initialization '
-           'instead (when instantiating a new `RCA_Supervised` '
-           'object).')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      rca_supervised.fit(X, y, random_state=np.random)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning_random_state(self):
-    # test that a ChangedBehavior warning is thrown if the random_state is
-    # not set in fit.
-    # TODO: remove in v.0.6
-    X, y = make_classification(random_state=42, n_samples=100)
-    rca_supervised = RCA_Supervised(num_chunks=20)
-    msg = ('As of v0.5.0, `RCA_Supervised` now uses the '
-           '`random_state` given at initialization to sample '
-           'constraints, not the default `np.random` from the `fit` '
-           'method, since this argument is now deprecated. '
-           'This warning will disappear in v0.6.0.')
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      rca_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
   def test_unknown_labels(self):
     n = 200
     num_chunks = 50
@@ -1403,30 +1020,6 @@ def test_bad_parameters(self):
     assert any(str(w.message) == msg for w in raised_warning)
 
 
-@pytest.mark.parametrize('num_dims', [None, 2])
-def test_deprecation_num_dims_rca(num_dims):
-  # test that a deprecation message is thrown if num_dims is set at
-  # initialization
-  # TODO: remove in v.0.6
-  X, y = load_iris(return_X_y=True)
-  rca = RCA(num_dims=num_dims)
-  msg = ('"num_dims" parameter is not used.'
-         ' It has been deprecated in version 0.5.0 and will be'
-         ' removed in 0.6.0. Use "n_components" instead')
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    rca.fit(X, y)
-  assert any(str(w.message) == msg for w in raised_warning)
-
-  # we take a small number of chunks so that RCA works on iris
-  rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10)
-  msg = ('"num_dims" parameter is not used.'
-         ' It has been deprecated in version 0.5.0 and will be'
-         ' removed in 0.6.0. Use "n_components" instead')
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    rca_supervised.fit(X, y)
-  assert any(str(w.message) == msg for w in raised_warning)
-
-
 class TestMLKR(MetricTestCase):
   def test_iris(self):
     mlkr = MLKR()
@@ -1457,52 +1050,6 @@ def grad_fn(M):
     rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M))
     np.testing.assert_almost_equal(rel_diff, 0.)
 
-  def test_deprecation_A0(self):
-    # test that a deprecation message is thrown if A0 is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mlkr = MLKR(A0=np.ones_like(X))
-    msg = ('"A0" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "init" instead.')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      mlkr.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([0.1, 0.2, 0.3, 0.4])
-    mlkr = MLKR()
-    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-           "the default init will now be set to 'auto', instead of 'pca'. "
-           "If you still want to use PCA as an init, set init='pca'. "
-           "This warning will disappear in v0.6.0, and `init` parameter's"
-           " default value will be set to 'auto'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      mlkr.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-
-@pytest.mark.parametrize('num_dims', [None, 2])
-def test_deprecation_num_dims_mlkr(num_dims):
-  # test that a deprecation message is thrown if num_dims is set at
-  # initialization
-  # TODO: remove in v.0.6
-  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-  y = np.array([1, 0, 1, 0])
-  mlkr = MLKR(num_dims=num_dims)
-  msg = ('"num_dims" parameter is not used.'
-         ' It has been deprecated in version 0.5.0 and will be'
-         ' removed in 0.6.0. Use "n_components" instead')
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    mlkr.fit(X, y)
-  assert (str(raised_warning[0].message) == msg)
-
 
 class TestMMC(MetricTestCase):
   def test_iris(self):
@@ -1543,96 +1090,6 @@ def test_iris(self):
     csep = class_separation(mmc.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.2)
 
-  def test_deprecation_num_labeled(self):
-    # test that a deprecation message is thrown if num_labeled is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mmc_supervised = MMC_Supervised(num_labeled=np.inf)
-    msg = ('"num_labeled" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           ' removed in 0.6.0')
-    assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y)
-
-  def test_deprecation_A0(self):
-    # test that a deprecation message is thrown if A0 is set at
-    # initialization
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mmc_supervised = MMC_Supervised(A0=np.ones_like(X))
-    msg = ('"A0" parameter is not used.'
-           ' It has been deprecated in version 0.5.0 and will be'
-           'removed in 0.6.0. Use "init" instead.')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      mmc_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
-    y_pairs = [1, -1]
-    mmc = MMC(A0=np.ones_like(X))
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      mmc.fit(pairs, y_pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning(self):
-    # test that a ChangedBehavior warning is thrown about the init, if the
-    # default parameters are used.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mmc_supervised = MMC_Supervised()
-    msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, "
-           "the default init will now be set to 'identity', instead of the "
-           "identity divided by a scaling factor of 10. "
-           "If you still want to use the same init as in previous "
-           "versions, set init=np.eye(d)/10, where d is the dimension "
-           "of your input space (d=pairs.shape[1]). "
-           "This warning will disappear in v0.6.0, and `init` parameter's"
-           " default value will be set to 'auto'.")
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      mmc_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
-    y_pairs = [1, -1]
-    mmc = MMC()
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      mmc.fit(pairs, y_pairs)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_deprecation_random_state(self):
-    # test that a deprecation message is thrown if random_state is set at
-    # fit time
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mmc_supervised = MMC_Supervised()
-    msg = ('"random_state" parameter in the `fit` function is '
-           'deprecated. Set `random_state` at initialization '
-           'instead (when instantiating a new `MMC_Supervised` '
-           'object).')
-    with pytest.warns(DeprecationWarning) as raised_warning:
-      mmc_supervised.fit(X, y, random_state=np.random)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
-  def test_changed_behaviour_warning_random_state(self):
-    # test that a ChangedBehavior warning is thrown if the random_state is
-    # not set in fit.
-    # TODO: remove in v.0.6
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    mmc_supervised = MMC_Supervised()
-    msg = ('As of v0.5.0, `MMC_Supervised` now uses the '
-           '`random_state` given at initialization to sample '
-           'constraints, not the default `np.random` from the `fit` '
-           'method, since this argument is now deprecated. '
-           'This warning will disappear in v0.6.0.')
-    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
-      mmc_supervised.fit(X, y)
-    assert any(msg == str(wrn.message) for wrn in raised_warning)
-
 
 @pytest.mark.parametrize(('algo_class', 'dataset'),
                          [(NCA, make_classification()),
diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py
index d1e2acf4..b9da87ed 100644
--- a/test/test_components_metric_conversion.py
+++ b/test/test_components_metric_conversion.py
@@ -37,8 +37,8 @@ def test_lsml_supervised(self):
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200)
-    itml.fit(self.X, self.y, random_state=seed)
+    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml.fit(self.X, self.y)
     L = itml.components_
     assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
 
@@ -70,9 +70,8 @@ def test_lfda(self):
     assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix())
 
   def test_rca_supervised(self):
-    seed = np.random.RandomState(1234)
     rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
-    rca.fit(self.X, self.y, random_state=seed)
+    rca.fit(self.X, self.y)
     L = rca.components_
     assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())
 
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index 2e3c3ef4..ab7e972d 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -226,24 +226,6 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset):
             np.isclose(metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20))
 
 
-@pytest.mark.parametrize('estimator, build_dataset', metric_learners,
-                         ids=ids_metric_learners)
-def test_metric_raises_deprecation_warning(estimator, build_dataset):
-  """assert that a deprecation warning is raised if someones wants to call
-  the `metric` function"""
-  # TODO: remove this method in version 0.6.0
-  input_data, labels, _, X = build_dataset()
-  model = clone(estimator)
-  set_random_state(model)
-  model.fit(*remove_y(estimator, input_data, labels))
-
-  with pytest.warns(DeprecationWarning) as raised_warning:
-    model.metric()
-  assert (str(raised_warning[0].message) ==
-          ("`metric` is deprecated since version 0.5.0 and will be removed "
-           "in 0.6.0. Use `get_mahalanobis_matrix` instead."))
-
-
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
                          ids=ids_metric_learners)
 def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset):
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index 7f7d7037..e18eb7f4 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -28,23 +28,23 @@
 
 class Stable_RCA_Supervised(RCA_Supervised):
 
-  def __init__(self, n_components=None, pca_comps=None,
+  def __init__(self, n_components=None,
                chunk_size=2, preprocessor=None, random_state=None):
     # this init makes RCA stable for scikit-learn examples.
     super(Stable_RCA_Supervised, self).__init__(
-        num_chunks=2, n_components=n_components, pca_comps=pca_comps,
+        num_chunks=2, n_components=n_components,
         chunk_size=chunk_size, preprocessor=preprocessor,
         random_state=random_state)
 
 
 class Stable_SDML_Supervised(SDML_Supervised):
 
-  def __init__(self, sparsity_param=0.01, num_labeled='deprecated',
+  def __init__(self, sparsity_param=0.01,
                num_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
     # this init makes SDML stable for scikit-learn examples.
     super(Stable_SDML_Supervised, self).__init__(
-        sparsity_param=sparsity_param, num_labeled=num_labeled,
+        sparsity_param=sparsity_param,
         num_constraints=num_constraints, verbose=verbose,
         preprocessor=preprocessor, balance_param=1e-5, prior='identity',
         random_state=random_state)