diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py
index 5973f7b8..05035085 100644
--- a/bench/benchmarks/iris.py
+++ b/bench/benchmarks/iris.py
@@ -5,15 +5,15 @@
 
 CLASSES = {
     'Covariance': metric_learn.Covariance(),
-    'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200),
+    'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200),
     'LFDA': metric_learn.LFDA(k=2, dim=2),
-    'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False),
-    'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200),
+    'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False),
+    'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200),
     'MLKR': metric_learn.MLKR(),
     'NCA': metric_learn.NCA(max_iter=700, n_components=2),
-    'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30,
+    'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30,
                                                   chunk_size=2),
-    'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500)
+    'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500)
 }
 
 
diff --git a/doc/supervised.rst b/doc/supervised.rst
index c6d8b68b..81c51e27 100644
--- a/doc/supervised.rst
+++ b/doc/supervised.rst
@@ -152,7 +152,7 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes,
     X = iris_data['data']
     Y = iris_data['target']
 
-    lmnn = LMNN(k=5, learn_rate=1e-6)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6)
     lmnn.fit(X, Y, verbose=False)
 
 .. topic:: References:
@@ -393,8 +393,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function
 (of label +1), this method will look at all the samples from the same label and
 sample randomly a pair among them. To sample negative pairs (of label -1), this
 method will look at all the samples from a different class and sample randomly
-a pair among them. The method will try to build `num_constraints` positive
-pairs and `num_constraints` negative pairs, but sometimes it cannot find enough
+a pair among them. The method will try to build `n_constraints` positive
+pairs and `n_constraints` negative pairs, but sometimes it cannot find enough
 of one of those, so forcing `same_length=True` will return both times the
 minimum of the two lenghts.
 
@@ -416,5 +416,5 @@ last points should be less similar than the two first points).
     X = iris_data['data']
     Y = iris_data['target']
 
-    mmc = MMC_Supervised(num_constraints=200)
+    mmc = MMC_Supervised(n_constraints=200)
     mmc.fit(X, Y)
diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
index 174210b8..59d0dae6 100644
--- a/doc/weakly_supervised.rst
+++ b/doc/weakly_supervised.rst
@@ -134,7 +134,7 @@ are respected.
 >>> from metric_learn import MMC
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(tuples, y)
-MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
+MMC(A0='deprecated', tol=0.001, diagonal=False,
   diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000,
   preprocessor=None, random_state=42, verbose=False)
 
@@ -250,7 +250,7 @@ tuples).
 >>> y_pairs = np.array([1, -1])
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(pairs, y_pairs)
-MMC(convergence_threshold=0.001, diagonal=False,
+MMC(tol=0.001, diagonal=False,
     diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None,
     random_state=42, verbose=False)
 
diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py
index 71229554..fe19166b 100644
--- a/examples/plot_metric_learning_examples.py
+++ b/examples/plot_metric_learning_examples.py
@@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 #
 
 # setting up LMNN
-lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)
+lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6)
 
 # fit the data!
 lmnn.fit(X, y)
@@ -310,7 +310,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 # - See more in the documentation of the class :py:class:`RCA
 #   <metric_learn.RCA>`
 
-rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)
+rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2)
 X_rca = rca.fit_transform(X, y)
 
 plot_tsne(X_rca, y)
diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py
index d5856667..264f6fd5 100644
--- a/examples/plot_sandwich.py
+++ b/examples/plot_sandwich.py
@@ -28,9 +28,9 @@ def sandwich_demo():
 
   mls = [
       LMNN(),
-      ITML_Supervised(num_constraints=200),
-      SDML_Supervised(num_constraints=200, balance_param=0.001),
-      LSML_Supervised(num_constraints=200),
+      ITML_Supervised(n_constraints=200),
+      SDML_Supervised(n_constraints=200, balance_param=0.001),
+      LSML_Supervised(n_constraints=200),
   ]
 
   for ax_num, ml in enumerate(mls, start=3):
diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py
index 2d86b819..210b0718 100644
--- a/metric_learn/constraints.py
+++ b/metric_learn/constraints.py
@@ -7,6 +7,7 @@
 from sklearn.utils import check_random_state
 from sklearn.neighbors import NearestNeighbors
 
+
 __all__ = ['Constraints']
 
 
@@ -31,21 +32,21 @@ def __init__(self, partial_labels):
     partial_labels = np.asanyarray(partial_labels, dtype=int)
     self.partial_labels = partial_labels
 
-  def positive_negative_pairs(self, num_constraints, same_length=False,
-                              random_state=None):
+  def positive_negative_pairs(self, n_constraints, same_length=False,
+                              random_state=None, num_constraints='deprecated'):
     """
     Generates positive pairs and negative pairs from labeled data.
 
-    Positive pairs are formed by randomly drawing ``num_constraints`` pairs of
+    Positive pairs are formed by randomly drawing ``n_constraints`` pairs of
     points with the same label. Negative pairs are formed by randomly drawing
-    ``num_constraints`` pairs of points with different label.
+    ``n_constraints`` pairs of points with different label.
 
     In the case where it is not possible to generate enough positive or
     negative pairs, a smaller number of pairs will be returned with a warning.
 
     Parameters
     ----------
-    num_constraints : int
+    n_constraints : int
       Number of positive and negative constraints to generate.
 
     same_length : bool, optional (default=False)
@@ -55,6 +56,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
     random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int.
 
+    num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
     Returns
     -------
     a : array-like, shape=(n_constraints,)
@@ -69,10 +72,18 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
     d : array-like, shape=(n_constraints,)
       1D array of indicators for the right elements of negative pairs.
     """
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
     random_state = check_random_state(random_state)
-    a, b = self._pairs(num_constraints, same_label=True,
+    a, b = self._pairs(n_constraints, same_label=True,
                        random_state=random_state)
-    c, d = self._pairs(num_constraints, same_label=False,
+    c, d = self._pairs(n_constraints, same_label=False,
                        random_state=random_state)
     if same_length and len(a) != len(c):
       n = min(len(a), len(c))
@@ -188,15 +199,15 @@ def generate_knntriplets(self, X, k_genuine, k_impostor):
 
     return triplets
 
-  def _pairs(self, num_constraints, same_label=True, max_iter=10,
+  def _pairs(self, n_constraints, same_label=True, max_iter=10,
              random_state=np.random):
     known_label_idx, = np.where(self.partial_labels >= 0)
     known_labels = self.partial_labels[known_label_idx]
     num_labels = len(known_labels)
     ab = set()
     it = 0
-    while it < max_iter and len(ab) < num_constraints:
-      nc = num_constraints - len(ab)
+    while it < max_iter and len(ab) < n_constraints:
+      nc = n_constraints - len(ab)
       for aidx in random_state.randint(num_labels, size=nc):
         if same_label:
           mask = known_labels[aidx] == known_labels
@@ -207,25 +218,26 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10,
         if len(b_choices) > 0:
           ab.add((aidx, random_state.choice(b_choices)))
       it += 1
-    if len(ab) < num_constraints:
+    if len(ab) < n_constraints:
       warnings.warn("Only generated %d %s constraints (requested %d)" % (
-          len(ab), 'positive' if same_label else 'negative', num_constraints))
-    ab = np.array(list(ab)[:num_constraints], dtype=int)
+          len(ab), 'positive' if same_label else 'negative', n_constraints))
+    ab = np.array(list(ab)[:n_constraints], dtype=int)
     return known_label_idx[ab.T]
 
-  def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
+  def chunks(self, n_chunks=100, chunk_size=2, random_state=None,
+             num_chunks='deprecated'):
     """
     Generates chunks from labeled data.
 
-    Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from
+    Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from
     the same class drawn at random. Each point can belong to at most 1 chunk.
 
-    In the case where there is not enough points to generate ``num_chunks``
+    In the case where there is not enough points to generate ``n_chunks``
     chunks of size ``chunk_size``, a ValueError will be raised.
 
     Parameters
     ----------
-    num_chunks : int, optional (default=100)
+    n_chunks : int, optional (default=100)
       Number of chunks to generate.
 
     chunk_size : int, optional (default=2)
@@ -234,12 +246,20 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
     random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int.
 
+    num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0
+
     Returns
     -------
     chunks : array-like, shape=(n_samples,)
       1D array of chunk indicators, where -1 indicates that the point does not
       belong to any chunk.
     """
+    if num_chunks != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_chunks = num_chunks
     random_state = check_random_state(random_state)
     chunks = -np.ones_like(self.partial_labels, dtype=int)
     uniq, lookup = np.unique(self.partial_labels, return_inverse=True)
@@ -247,13 +267,13 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
     all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq))
                 if c not in unknown_uniq]
     max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds]))
-    if max_chunks < num_chunks:
+    if max_chunks < n_chunks:
       raise ValueError(('Not enough possible chunks of %d elements in each'
                         ' class to form expected %d chunks - maximum number'
                         ' of chunks is %d'
-                        ) % (chunk_size, num_chunks, max_chunks))
+                        ) % (chunk_size, n_chunks, max_chunks))
     idx = 0
-    while idx < num_chunks and all_inds:
+    while idx < n_chunks and all_inds:
       if len(all_inds) == 1:
         c = 0
       else:
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 43872b60..29b48daa 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -9,6 +9,7 @@
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseITML(MahalanobisMixin):
@@ -16,12 +17,20 @@ class _BaseITML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
+  def __init__(self, gamma=1., max_iter=1000, tol=1e-3,
                prior='identity', verbose=False,
-               preprocessor=None, random_state=None):
+               preprocessor=None, random_state=None,
+               convergence_threshold='deprecated'):
+    if convergence_threshold != 'deprecated':
+      warnings.warn('"convergence_threshold" parameter has been '
+                    ' renamed to "tol". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      tol = convergence_threshold
+    self.convergence_threshold = 'deprecated'  # Avoid errors
     self.gamma = gamma
     self.max_iter = max_iter
-    self.convergence_threshold = convergence_threshold
+    self.tol = tol
     self.prior = prior
     self.verbose = verbose
     self.random_state = random_state
@@ -86,7 +95,7 @@ def _fit(self, pairs, y, bounds=None):
         conv = np.inf
         break
       conv = np.abs(lambdaold - _lambda).sum() / normsum
-      if conv < self.convergence_threshold:
+      if conv < self.tol:
         break
       lambdaold = _lambda.copy()
       if self.verbose:
@@ -122,7 +131,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
   max_iter : int, optional (default=1000)
     Maximum number of iteration of the optimization procedure.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence tolerance.
 
   prior : string or numpy array, optional (default='identity')
@@ -158,6 +167,8 @@ class ITML(_BaseITML, _PairsClassifierMixin):
     A pseudo random number generator object or a seed for it if int. If
     ``prior='random'``, ``random_state`` is used to set the prior.
 
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   bounds_ : `numpy.ndarray`, shape=(2,)
@@ -260,10 +271,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   max_iter : int, optional (default=1000)
     Maximum number of iterations of the optimization procedure.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Tolerance of the optimization procedure.
 
-  num_constraints : int, optional (default=None)
+  n_constraints : int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
@@ -302,6 +313,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
     case, `random_state` is also used to randomly sample constraints from
     labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
 
   Attributes
   ----------
@@ -328,7 +342,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> itml = ITML_Supervised(num_constraints=200)
+  >>> itml = ITML_Supervised(n_constraints=200)
   >>> itml.fit(X, Y)
 
   See Also
@@ -338,14 +352,26 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
     that describes the supervised version of weakly supervised estimators.
   """
 
-  def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3,
-               num_constraints=None, prior='identity',
-               verbose=False, preprocessor=None, random_state=None):
+  def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3,
+               n_constraints=None, prior='identity',
+               verbose=False, preprocessor=None, random_state=None,
+               num_constraints='deprecated',
+               convergence_threshold='deprecated'):
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
-                       convergence_threshold=convergence_threshold,
+                       tol=tol,
                        prior=prior, verbose=verbose,
-                       preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+                       preprocessor=preprocessor,
+                       random_state=random_state,
+                       convergence_threshold=convergence_threshold)
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_constraints = num_constraints
+    self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y, bounds=None):
     """Create constraints from labels and learn the ITML model.
@@ -369,13 +395,13 @@ def fit(self, X, y, bounds=None):
       points in the training data `X`.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseITML._fit(self, pairs, y, bounds=bounds)
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 8bdc4bf0..47bb065f 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -5,6 +5,7 @@
 from collections import Counter
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
+import warnings
 
 from ._util import _initialize_components, _check_n_components
 from .base_metric import MahalanobisMixin
@@ -63,7 +64,7 @@ class LMNN(MahalanobisMixin, TransformerMixin):
       :meth:`fit` and n_features_a must be less than or equal to that.
       If ``n_components`` is not None, n_features_a must match it.
 
-  k : int, optional (default=3)
+  n_neighbors : int, optional (default=3)
     Number of neighbors to consider, not including self-edges.
 
   min_iter : int, optional (default=50)
@@ -99,6 +100,8 @@ class LMNN(MahalanobisMixin, TransformerMixin):
     transformation. If ``init='pca'``, ``random_state`` is passed as an
     argument to PCA when initializing the transformation.
 
+  k : Renamed to n_neighbors. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   n_iter_ : `int`
@@ -116,7 +119,7 @@ class LMNN(MahalanobisMixin, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> lmnn = LMNN(k=5, learn_rate=1e-6)
+  >>> lmnn = LMNN(n_neighbors=5, learn_rate=1e-6)
   >>> lmnn.fit(X, Y, verbose=False)
 
   References
@@ -128,12 +131,19 @@ class LMNN(MahalanobisMixin, TransformerMixin):
          2005.
   """
 
-  def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
+  def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000,
                learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
                verbose=False, preprocessor=None,
-               n_components=None, random_state=None):
+               n_components=None, random_state=None, k='deprecated'):
     self.init = init
-    self.k = k
+    if k != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_neighbors = k
+    self.k = 'deprecated'  # To avoid no_attribute error
+    self.n_neighbors = n_neighbors
     self.min_iter = min_iter
     self.max_iter = max_iter
     self.learn_rate = learn_rate
@@ -145,7 +155,7 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
     super(LMNN, self).__init__(preprocessor)
 
   def fit(self, X, y):
-    k = self.k
+    k = self.n_neighbors
     reg = self.regularization
     learn_rate = self.learn_rate
 
@@ -162,7 +172,7 @@ def fit(self, X, y):
                                               self.verbose,
                                               random_state=self.random_state)
     required_k = np.bincount(label_inds).min()
-    if self.k > required_k:
+    if self.n_neighbors > required_k:
       raise ValueError('not enough class labels for specified k'
                        ' (smallest class has %d)' % required_k)
 
@@ -275,12 +285,12 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds):
     return 2 * G, objective, total_active
 
   def _select_targets(self, X, label_inds):
-    target_neighbors = np.empty((X.shape[0], self.k), dtype=int)
+    target_neighbors = np.empty((X.shape[0], self.n_neighbors), dtype=int)
     for label in self.labels_:
       inds, = np.nonzero(label_inds == label)
       dd = euclidean_distances(X[inds], squared=True)
       np.fill_diagonal(dd, np.inf)
-      nn = np.argsort(dd)[..., :self.k]
+      nn = np.argsort(dd)[..., :self.n_neighbors]
       target_neighbors[inds] = inds[nn]
     return target_neighbors
 
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 28f65ce7..af7fa95b 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -9,6 +9,7 @@
 from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseLSML(MahalanobisMixin):
@@ -261,11 +262,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  num_constraints: int, optional (default=None)
+  n_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
-  weights : (num_constraints,) array of floats, optional (default=None)
+  weights : (n_constraints,) array of floats, optional (default=None)
     Relative weight given to each constraint. If None, defaults to uniform
     weights.
 
@@ -282,6 +283,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
     prior. In any case, `random_state` is also used to randomly sample
     constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import LSML_Supervised
@@ -289,7 +292,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> lsml = LSML_Supervised(num_constraints=200)
+  >>> lsml = LSML_Supervised(n_constraints=200)
   >>> lsml.fit(X, Y)
 
   Attributes
@@ -303,12 +306,22 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   """
 
   def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
-               num_constraints=None, weights=None,
-               verbose=False, preprocessor=None, random_state=None):
+               n_constraints=None, weights=None,
+               verbose=False, preprocessor=None, random_state=None,
+               num_constraints='deprecated'):
     _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
                        verbose=verbose, preprocessor=preprocessor,
                        random_state=random_state)
-    self.num_constraints = num_constraints
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
     self.weights = weights
 
   def fit(self, X, y):
@@ -323,13 +336,13 @@ def fit(self, X, y):
       Data labels.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints, same_length=True,
+    pos_neg = c.positive_negative_pairs(n_constraints, same_length=True,
                                         random_state=self.random_state)
     return _BaseLSML._fit(self, X[np.column_stack(pos_neg)],
                           weights=self.weights)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 1ff30b1e..5cf166fd 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -6,19 +6,28 @@
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseMMC(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
+  def __init__(self, max_iter=100, max_proj=10000, tol=1e-3,
                init='identity', diagonal=False,
                diagonal_c=1.0, verbose=False, preprocessor=None,
-               random_state=None):
+               random_state=None,
+               convergence_threshold='deprecated'):
+    if convergence_threshold != 'deprecated':
+      warnings.warn('"convergence_threshold" parameter has been '
+                    ' renamed to "tol". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      tol = convergence_threshold
+    self.convergence_threshold = 'deprecated'  # Avoid errors
     self.max_iter = max_iter
     self.max_proj = max_proj
-    self.convergence_threshold = convergence_threshold
+    self.tol = tol
     self.init = init
     self.diagonal = diagonal
     self.diagonal_c = diagonal_c
@@ -145,13 +154,13 @@ def _fit_full(self, pairs, y):
         A[:] = A_old + alpha * M
 
       delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old)
-      if delta < self.convergence_threshold:
+      if delta < self.tol:
         break
       if self.verbose:
         print('mmc iter: %d, conv = %f, projections = %d' %
               (cycle, delta, it + 1))
 
-    if delta > self.convergence_threshold:
+    if delta > self.tol:
       self.converged_ = False
       if self.verbose:
         print('mmc did not converge, conv = %f' % (delta,))
@@ -185,7 +194,7 @@ def _fit_diag(self, pairs, y):
     reduction = 2.0
     w = np.diag(self.A_).copy()
 
-    while error > self.convergence_threshold and it < self.max_iter:
+    while error > self.tol and it < self.max_iter:
 
       fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w)
       obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0
@@ -332,7 +341,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
   max_proj : int, optional (default=10000)
     Maximum number of projection steps.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
   init : string or numpy array, optional (default='identity')
@@ -377,6 +386,8 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
     ``init='random'``, ``random_state`` is used to initialize the random
     transformation.
 
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   n_iter_ : `int`
@@ -469,10 +480,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   max_proj : int, optional (default=10000)
     Maximum number of projection steps.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
-  num_constraints: int, optional (default=None)
+  n_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
@@ -518,6 +529,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
     Mahalanobis matrix.  In any case, `random_state` is also used to
     randomly sample constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import MMC_Supervised
@@ -525,7 +540,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> mmc = MMC_Supervised(num_constraints=200)
+  >>> mmc = MMC_Supervised(n_constraints=200)
   >>> mmc.fit(X, Y)
 
   Attributes
@@ -538,16 +553,29 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
     metric (See function `components_from_metric`.)
   """
 
-  def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
-               num_constraints=None, init='identity',
+  def __init__(self, max_iter=100, max_proj=10000, tol=1e-6,
+               n_constraints=None, init='identity',
                diagonal=False, diagonal_c=1.0, verbose=False,
-               preprocessor=None, random_state=None):
+               preprocessor=None, random_state=None,
+               num_constraints='deprecated',
+               convergence_threshold='deprecated'):
     _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
-                      convergence_threshold=convergence_threshold,
+                      tol=tol,
                       init=init, diagonal=diagonal,
                       diagonal_c=diagonal_c, verbose=verbose,
-                      preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+                      preprocessor=preprocessor,
+                      random_state=random_state,
+                      convergence_threshold=convergence_threshold)
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y):
     """Create constraints from labels and learn the MMC model.
@@ -561,13 +589,13 @@ def fit(self, X, y):
       Data labels.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseMMC._fit(self, pairs, y)
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 34f7f3ff..aa726761 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -13,13 +13,13 @@
 
 # mean center each chunklet separately
 def _chunk_mean_centering(data, chunks):
-  num_chunks = chunks.max() + 1
+  n_chunks = chunks.max() + 1
   chunk_mask = chunks != -1
   # We need to ensure the data is float so that we can substract the
   # mean on it
   chunk_data = data[chunk_mask].astype(float, copy=False)
   chunk_labels = chunks[chunk_mask]
-  for c in range(num_chunks):
+  for c in range(n_chunks):
     mask = chunk_labels == c
     chunk_data[mask] -= chunk_data[mask].mean(axis=0)
 
@@ -135,14 +135,14 @@ class RCA_Supervised(RCA):
 
   `RCA_Supervised` creates chunks of similar points by first sampling a
   class, taking `chunk_size` elements in it, and repeating the process
-  `num_chunks` times.
+  `n_chunks` times.
 
   Parameters
   ----------
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_chunks: int, optional (default=100)
+  n_chunks: int, optional (default=100)
     Number of chunks to generate.
 
   chunk_size: int, optional (default=2)
@@ -156,6 +156,8 @@ class RCA_Supervised(RCA):
     A pseudo random number generator object or a seed for it if int.
     It is used to randomly sample constraints from labels.
 
+  num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import RCA_Supervised
@@ -163,7 +165,7 @@ class RCA_Supervised(RCA):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2)
+  >>> rca = RCA_Supervised(n_chunks=30, chunk_size=2)
   >>> rca.fit(X, Y)
 
   Attributes
@@ -172,17 +174,25 @@ class RCA_Supervised(RCA):
     The learned linear transformation ``L``.
   """
 
-  def __init__(self, n_components=None, num_chunks=100, chunk_size=2,
-               preprocessor=None, random_state=None):
+  def __init__(self, n_components=None, n_chunks=100, chunk_size=2,
+               preprocessor=None, random_state=None,
+               num_chunks='deprecated'):
     """Initialize the supervised version of `RCA`."""
     RCA.__init__(self, n_components=n_components, preprocessor=preprocessor)
-    self.num_chunks = num_chunks
+    if num_chunks != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_chunks = num_chunks
+    self.num_chunks = 'deprecated'  # To avoid no_attribute error
+    self.n_chunks = n_chunks
     self.chunk_size = chunk_size
     self.random_state = random_state
 
   def fit(self, X, y):
     """Create constraints from labels and learn the RCA model.
-    Needs num_constraints specified in constructor.
+    Needs n_constraints specified in constructor. (Not true?)
 
     Parameters
     ----------
@@ -192,11 +202,11 @@ def fit(self, X, y):
     y : (n) data labels
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    chunks = Constraints(y).chunks(num_chunks=self.num_chunks,
+    chunks = Constraints(y).chunks(n_chunks=self.n_chunks,
                                    chunk_size=self.chunk_size,
                                    random_state=self.random_state)
 
-    if self.num_chunks * (self.chunk_size - 1) < X.shape[1]:
+    if self.n_chunks * (self.chunk_size - 1) < X.shape[1]:
       warnings.warn('Due to the parameters of RCA_Supervised, '
                     'the inner covariance matrix is not invertible, '
                     'so the transformation matrix will contain Nan values. '
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index a0736ffa..93f3f441 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -177,7 +177,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> sdml = SDML_Supervised(num_constraints=200)
+  >>> sdml = SDML_Supervised(n_constraints=200)
   >>> sdml.fit(X, Y)
 
   References
@@ -262,7 +262,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  num_constraints : int, optional (default=None)
+  n_constraints : int, optional (default=None)
     Number of constraints to generate. If None, defaults to `20 *
     num_classes**2`.
 
@@ -279,6 +279,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
     prior. In any case, `random_state` is also used to randomly sample
     constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   components_ : `numpy.ndarray`, shape=(n_features, n_features)
@@ -293,13 +295,22 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
   """
 
   def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
-               num_constraints=None, verbose=False, preprocessor=None,
-               random_state=None):
+               n_constraints=None, verbose=False, preprocessor=None,
+               random_state=None, num_constraints='deprecated'):
     _BaseSDML.__init__(self, balance_param=balance_param,
                        sparsity_param=sparsity_param, prior=prior,
                        verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y):
     """Create constraints from labels and learn the SDML model.
@@ -318,13 +329,13 @@ def fit(self, X, y):
       Returns the instance.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseSDML._fit(self, pairs, y)
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 2debe426..68be84c2 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -312,7 +312,7 @@ def test_large_output_iter(self):
 
 class TestLSML(MetricTestCase):
   def test_iris(self):
-    lsml = LSML_Supervised(num_constraints=200)
+    lsml = LSML_Supervised(n_constraints=200)
     lsml.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(lsml.transform(self.iris_points), self.iris_labels)
@@ -321,7 +321,7 @@ def test_iris(self):
 
 class TestITML(MetricTestCase):
   def test_iris(self):
-    itml = ITML_Supervised(num_constraints=200)
+    itml = ITML_Supervised(n_constraints=200)
     itml.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(itml.transform(self.iris_points), self.iris_labels)
@@ -367,7 +367,7 @@ def test_bounds_parameters_invalid(bounds):
 
 class TestLMNN(MetricTestCase):
   def test_iris(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(lmnn.transform(self.iris_points),
@@ -384,7 +384,7 @@ def test_loss_grad_lbfgs(self):
     L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1])
     lmnn = LMNN()
 
-    k = lmnn.k
+    k = lmnn.n_neighbors
     reg = lmnn.regularization
 
     X, y = lmnn._prepare_inputs(X, y, dtype=float,
@@ -560,9 +560,9 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds):
 def test_toy_ex_lmnn(X, y, loss):
   """Test that the loss give the right result on a toy example"""
   L = np.array([[1]])
-  lmnn = LMNN(k=1, regularization=0.5)
+  lmnn = LMNN(n_neighbors=1, regularization=0.5)
 
-  k = lmnn.k
+  k = lmnn.n_neighbors
   reg = lmnn.regularization
 
   X, y = lmnn._prepare_inputs(X, y, dtype=float,
@@ -736,7 +736,7 @@ def test_iris(self):
     # TODO: un-flake it!
     rs = np.random.RandomState(5555)
 
-    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
+    sdml = SDML_Supervised(n_constraints=1500, prior='identity',
                            balance_param=5e-5, random_state=rs)
     sdml.fit(self.iris_points, self.iris_labels)
     csep = class_separation(sdml.transform(self.iris_points),
@@ -965,7 +965,7 @@ def test_iris(self):
 
 class TestRCA(MetricTestCase):
   def test_iris(self):
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2)
     rca.fit(self.iris_points, self.iris_labels)
     csep = class_separation(rca.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.29)
@@ -991,15 +991,15 @@ def test_rank_deficient_returns_warning(self):
 
   def test_unknown_labels(self):
     n = 200
-    num_chunks = 50
+    n_chunks = 50
     X, y = make_classification(random_state=42, n_samples=2 * n,
                                n_features=6, n_informative=6, n_redundant=0)
     y2 = np.concatenate((y[:n], -np.ones(n)))
 
-    rca = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     rca.fit(X[:n], y[:n])
 
-    rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca2 = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     rca2.fit(X, y2)
 
     assert not np.any(np.isnan(rca.components_))
@@ -1009,11 +1009,11 @@ def test_unknown_labels(self):
 
   def test_bad_parameters(self):
     n = 200
-    num_chunks = 3
+    n_chunks = 3
     X, y = make_classification(random_state=42, n_samples=n,
                                n_features=6, n_informative=6, n_redundant=0)
 
-    rca = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     msg = ('Due to the parameters of RCA_Supervised, '
            'the inner covariance matrix is not invertible, '
            'so the transformation matrix will contain Nan values. '
@@ -1067,7 +1067,7 @@ def test_iris(self):
 
     # Full metric
     n_features = self.iris_points.shape[1]
-    mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10)
+    mmc = MMC(tol=0.01, init=np.eye(n_features) / 10)
     mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
     expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                 [+0.000868, +0.001468, -0.002021, -0.002879],
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 67f9b6a0..e13455be 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -36,15 +36,16 @@ def test_covariance(self):
                      remove_spaces(f"Covariance({merged_kwargs})"))
 
   def test_lmnn(self):
-    def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'k': 3,
+    def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'n_neighbors': 3,
                   'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50,
                   'n_components': None, 'preprocessor': None,
                   'random_state': None, 'regularization': 0.5,
                   'verbose': False}
-    nndef_kwargs = {'convergence_tol': 0.01, 'k': 6}
+    nndef_kwargs = {'convergence_tol': 0.01, 'n_neighbors': 6}
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))),
+        remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01,
+                                            n_neighbors=6))),
         remove_spaces(f"LMNN({merged_kwargs})"))
 
   def test_nca(self):
@@ -65,21 +66,21 @@ def test_lfda(self):
                      remove_spaces(f"LFDA({merged_kwargs})"))
 
   def test_itml(self):
-    def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0,
+    def_kwargs = {'tol': 0.001, 'gamma': 1.0,
                   'max_iter': 1000, 'preprocessor': None,
                   'prior': 'identity', 'random_state': None, 'verbose': False}
     nndef_kwargs = {'gamma': 0.5}
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))),
                      remove_spaces(f"ITML({merged_kwargs})"))
-    def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0,
-                  'max_iter': 1000, 'num_constraints': None,
+    def_kwargs = {'tol': 0.001, 'gamma': 1.0,
+                  'max_iter': 1000, 'n_constraints': None,
                   'preprocessor': None, 'prior': 'identity',
                   'random_state': None, 'verbose': False}
-    nndef_kwargs = {'num_constraints': 7}
+    nndef_kwargs = {'n_constraints': 7}
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))),
+        remove_spaces(str(metric_learn.ITML_Supervised(n_constraints=7))),
         remove_spaces(f"ITML_Supervised({merged_kwargs})"))
 
   def test_lsml(self):
@@ -89,7 +90,7 @@ def test_lsml(self):
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))),
                      remove_spaces(f"LSML({merged_kwargs})"))
-    def_kwargs = {'max_iter': 1000, 'num_constraints': None,
+    def_kwargs = {'max_iter': 1000, 'n_constraints': None,
                   'preprocessor': None, 'prior': 'identity',
                   'random_state': None, 'tol': 0.001, 'verbose': False,
                   'weights': None}
@@ -107,7 +108,7 @@ def test_sdml(self):
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))),
                      remove_spaces(f"SDML({merged_kwargs})"))
-    def_kwargs = {'balance_param': 0.5, 'num_constraints': None,
+    def_kwargs = {'balance_param': 0.5, 'n_constraints': None,
                   'preprocessor': None, 'prior': 'identity',
                   'random_state': None, 'sparsity_param': 0.01,
                   'verbose': False}
@@ -123,12 +124,12 @@ def test_rca(self):
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))),
                      remove_spaces(f"RCA({merged_kwargs})"))
-    def_kwargs = {'chunk_size': 2, 'n_components': None, 'num_chunks': 100,
+    def_kwargs = {'chunk_size': 2, 'n_components': None, 'n_chunks': 100,
                   'preprocessor': None, 'random_state': None}
-    nndef_kwargs = {'num_chunks': 5}
+    nndef_kwargs = {'n_chunks': 5}
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))),
+        remove_spaces(str(metric_learn.RCA_Supervised(n_chunks=5))),
         remove_spaces(f"RCA_Supervised({merged_kwargs})"))
 
   def test_mlkr(self):
@@ -141,7 +142,7 @@ def test_mlkr(self):
                      remove_spaces(f"MLKR({merged_kwargs})"))
 
   def test_mmc(self):
-    def_kwargs = {'convergence_threshold': 0.001, 'diagonal': False,
+    def_kwargs = {'tol': 0.001, 'diagonal': False,
                   'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100,
                   'max_proj': 10000, 'preprocessor': None,
                   'random_state': None, 'verbose': False}
@@ -149,9 +150,9 @@ def test_mmc(self):
     merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))),
                      remove_spaces(f"MMC({merged_kwargs})"))
-    def_kwargs = {'convergence_threshold': 1e-06, 'diagonal': False,
+    def_kwargs = {'tol': 1e-06, 'diagonal': False,
                   'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100,
-                  'max_proj': 10000, 'num_constraints': None,
+                  'max_proj': 10000, 'n_constraints': None,
                   'preprocessor': None, 'random_state': None,
                   'verbose': False}
     nndef_kwargs = {'max_iter': 1}
diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py
index 5502ad90..c6113957 100644
--- a/test/test_components_metric_conversion.py
+++ b/test/test_components_metric_conversion.py
@@ -29,27 +29,27 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     lsml.fit(self.X, self.y)
     L = lsml.components_
     assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     itml.fit(self.X, self.y)
     L = itml.components_
     assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
 
   def test_lmnn(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.X, self.y)
     L = lmnn.components_
     assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
+    sdml = SDML_Supervised(n_constraints=1500, prior='identity',
                            balance_param=1e-5, random_state=seed)
     sdml.fit(self.X, self.y)
     L = sdml.components_
@@ -69,7 +69,7 @@ def test_lfda(self):
     assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix())
 
   def test_rca_supervised(self):
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2)
     rca.fit(self.X, self.y)
     L = rca.components_
     assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())
diff --git a/test/test_constraints.py b/test/test_constraints.py
index 92876779..9d01f3d3 100644
--- a/test/test_constraints.py
+++ b/test/test_constraints.py
@@ -7,14 +7,14 @@
 SEED = 42
 
 
-def gen_labels_for_chunks(num_chunks, chunk_size,
+def gen_labels_for_chunks(n_chunks, chunk_size,
                           n_classes=10, n_unknown_labels=5):
-  """Generates num_chunks*chunk_size labels that split in num_chunks chunks,
+  """Generates n_chunks*chunk_size labels that split in n_chunks chunks,
   that are homogeneous in the label."""
-  assert min(num_chunks, chunk_size) > 0
+  assert min(n_chunks, chunk_size) > 0
   classes = shuffle(np.arange(n_classes), random_state=SEED)
-  n_per_class = chunk_size * (num_chunks // n_classes)
-  n_maj_class = chunk_size * num_chunks - n_per_class * (n_classes - 1)
+  n_per_class = chunk_size * (n_chunks // n_classes)
+  n_maj_class = chunk_size * n_chunks - n_per_class * (n_classes - 1)
 
   first_labels = classes[0] * np.ones(n_maj_class, dtype=int)
   remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int)
@@ -25,48 +25,48 @@ def gen_labels_for_chunks(num_chunks, chunk_size,
   return shuffle(labels, random_state=SEED)
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_exact_num_points_for_chunks(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_exact_num_points_for_chunks(n_chunks, chunk_size):
   """Checks that the chunk generation works well with just enough points."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   constraints = Constraints(labels)
-  chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+  chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                               random_state=SEED)
 
   chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0],
                                         return_counts=True)
 
   np.testing.assert_array_equal(size_each_chunk, chunk_size)
-  assert chunk_no.shape[0] == num_chunks
+  assert chunk_no.shape[0] == n_chunks
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_chunk_case_one_miss_point(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_chunk_case_one_miss_point(n_chunks, chunk_size):
   """Checks that the chunk generation breaks when one point is missing."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   assert len(labels) >= 1
   constraints = Constraints(labels[1:])
   with pytest.raises(ValueError) as e:
-    constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+    constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                        random_state=SEED)
 
   expected_message = (('Not enough possible chunks of %d elements in each'
                        ' class to form expected %d chunks - maximum number'
                        ' of chunks is %d'
-                       ) % (chunk_size, num_chunks, num_chunks - 1))
+                       ) % (chunk_size, n_chunks, n_chunks - 1))
 
   assert str(e.value) == expected_message
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_unknown_labels_not_in_chunks(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_unknown_labels_not_in_chunks(n_chunks, chunk_size):
   """Checks that unknown labels are not assigned to any chunk."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   constraints = Constraints(labels)
-  chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+  chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                               random_state=SEED)
 
   assert np.all(chunks[labels < 0] < 0)
diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py
index d4d4bfe0..246223b0 100644
--- a/test/test_fit_transform.py
+++ b/test/test_fit_transform.py
@@ -29,47 +29,47 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     lsml.fit(self.X, self.y)
     res_1 = lsml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     res_2 = lsml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     itml.fit(self.X, self.y)
     res_1 = itml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     res_2 = itml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_lmnn(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.X, self.y)
     res_1 = lmnn.transform(self.X)
 
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     res_2 = lmnn.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
+    sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5,
                            prior='identity', random_state=seed)
     sdml.fit(self.X, self.y)
     res_1 = sdml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
+    sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5,
                            prior='identity', random_state=seed)
     res_2 = sdml.fit_transform(self.X, self.y)
 
@@ -99,13 +99,13 @@ def test_lfda(self):
 
   def test_rca_supervised(self):
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2,
                          random_state=seed)
     rca.fit(self.X, self.y)
     res_1 = rca.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2,
                          random_state=seed)
     res_2 = rca.fit_transform(self.X, self.y)
 
@@ -123,12 +123,12 @@ def test_mlkr(self):
 
   def test_mmc_supervised(self):
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    mmc = MMC_Supervised(n_constraints=200, random_state=seed)
     mmc.fit(self.X, self.y)
     res_1 = mmc.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    mmc = MMC_Supervised(n_constraints=200, random_state=seed)
     res_2 = mmc.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index e3d981a4..cc12788c 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -417,7 +417,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes,
                           random_state=rng)
     # To make the test work for LMNN:
     if 'LMNN' in model_base.__class__.__name__:
-      model_base.set_params(k=1)
+      model_base.set_params(n_neighbors=1)
     # To make the test faster for estimators that have a max_iter:
     if hasattr(model_base, 'max_iter'):
       model_base.set_params(max_iter=1)
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index 3ad69712..c56f43f2 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -29,7 +29,7 @@ def __init__(self, n_components=None,
                chunk_size=2, preprocessor=None, random_state=None):
     # this init makes RCA stable for scikit-learn examples.
     super(Stable_RCA_Supervised, self).__init__(
-        num_chunks=2, n_components=n_components,
+        n_chunks=2, n_components=n_components,
         chunk_size=chunk_size, preprocessor=preprocessor,
         random_state=random_state)
 
@@ -37,12 +37,12 @@ def __init__(self, n_components=None,
 class Stable_SDML_Supervised(SDML_Supervised):
 
   def __init__(self, sparsity_param=0.01,
-               num_constraints=None, verbose=False, preprocessor=None,
+               n_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
     # this init makes SDML stable for scikit-learn examples.
     super(Stable_SDML_Supervised, self).__init__(
         sparsity_param=sparsity_param,
-        num_constraints=num_constraints, verbose=verbose,
+        n_constraints=n_constraints, verbose=verbose,
         preprocessor=preprocessor, balance_param=1e-5, prior='identity',
         random_state=random_state)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index 072b94c5..a32ee563 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -60,11 +60,11 @@ def build_regression(with_preprocessor=False):
 def build_data():
   input_data, labels = load_iris(return_X_y=True)
   X, y = shuffle(input_data, labels, random_state=SEED)
-  num_constraints = 50
+  n_constraints = 50
   constraints = Constraints(y)
   pairs = (
       constraints
-      .positive_negative_pairs(num_constraints, same_length=True,
+      .positive_negative_pairs(n_constraints, same_length=True,
                                random_state=check_random_state(SEED)))
   return X, pairs
 
@@ -137,7 +137,7 @@ def build_quadruplets(with_preprocessor=False):
                (ITML_Supervised(max_iter=5), build_classification),
                (LSML_Supervised(), build_classification),
                (MMC_Supervised(max_iter=5), build_classification),
-               (RCA_Supervised(num_chunks=5), build_classification),
+               (RCA_Supervised(n_chunks=5), build_classification),
                (SDML_Supervised(prior='identity', balance_param=1e-5),
                build_classification),
                (SCML_Supervised(), build_classification)]