diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index 5973f7b8..05035085 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -5,15 +5,15 @@ CLASSES = { 'Covariance': metric_learn.Covariance(), - 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), + 'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), - 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), - 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), + 'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False), + 'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, n_components=2), - 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, + 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30, chunk_size=2), - 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) + 'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500) } diff --git a/doc/supervised.rst b/doc/supervised.rst index c6d8b68b..81c51e27 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -152,7 +152,7 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, X = iris_data['data'] Y = iris_data['target'] - lmnn = LMNN(k=5, learn_rate=1e-6) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) .. topic:: References: @@ -393,8 +393,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function (of label +1), this method will look at all the samples from the same label and sample randomly a pair among them. To sample negative pairs (of label -1), this method will look at all the samples from a different class and sample randomly -a pair among them. The method will try to build `num_constraints` positive -pairs and `num_constraints` negative pairs, but sometimes it cannot find enough +a pair among them. The method will try to build `n_constraints` positive +pairs and `n_constraints` negative pairs, but sometimes it cannot find enough of one of those, so forcing `same_length=True` will return both times the minimum of the two lenghts. @@ -416,5 +416,5 @@ last points should be less similar than the two first points). X = iris_data['data'] Y = iris_data['target'] - mmc = MMC_Supervised(num_constraints=200) + mmc = MMC_Supervised(n_constraints=200) mmc.fit(X, Y) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 174210b8..59d0dae6 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -134,7 +134,7 @@ are respected. >>> from metric_learn import MMC >>> mmc = MMC(random_state=42) >>> mmc.fit(tuples, y) -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, +MMC(A0='deprecated', tol=0.001, diagonal=False, diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) @@ -250,7 +250,7 @@ tuples). >>> y_pairs = np.array([1, -1]) >>> mmc = MMC(random_state=42) >>> mmc.fit(pairs, y_pairs) -MMC(convergence_threshold=0.001, diagonal=False, +MMC(tol=0.001, diagonal=False, diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 71229554..fe19166b 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) +lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6) # fit the data! lmnn.fit(X, y) @@ -310,7 +310,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`RCA # ` -rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) +rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2) X_rca = rca.fit_transform(X, y) plot_tsne(X_rca, y) diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index d5856667..264f6fd5 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -28,9 +28,9 @@ def sandwich_demo(): mls = [ LMNN(), - ITML_Supervised(num_constraints=200), - SDML_Supervised(num_constraints=200, balance_param=0.001), - LSML_Supervised(num_constraints=200), + ITML_Supervised(n_constraints=200), + SDML_Supervised(n_constraints=200, balance_param=0.001), + LSML_Supervised(n_constraints=200), ] for ax_num, ml in enumerate(mls, start=3): diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 2d86b819..210b0718 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -7,6 +7,7 @@ from sklearn.utils import check_random_state from sklearn.neighbors import NearestNeighbors + __all__ = ['Constraints'] @@ -31,21 +32,21 @@ def __init__(self, partial_labels): partial_labels = np.asanyarray(partial_labels, dtype=int) self.partial_labels = partial_labels - def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=None): + def positive_negative_pairs(self, n_constraints, same_length=False, + random_state=None, num_constraints='deprecated'): """ Generates positive pairs and negative pairs from labeled data. - Positive pairs are formed by randomly drawing ``num_constraints`` pairs of + Positive pairs are formed by randomly drawing ``n_constraints`` pairs of points with the same label. Negative pairs are formed by randomly drawing - ``num_constraints`` pairs of points with different label. + ``n_constraints`` pairs of points with different label. In the case where it is not possible to generate enough positive or negative pairs, a smaller number of pairs will be returned with a warning. Parameters ---------- - num_constraints : int + n_constraints : int Number of positive and negative constraints to generate. same_length : bool, optional (default=False) @@ -55,6 +56,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False, random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Returns ------- a : array-like, shape=(n_constraints,) @@ -69,10 +72,18 @@ def positive_negative_pairs(self, num_constraints, same_length=False, d : array-like, shape=(n_constraints,) 1D array of indicators for the right elements of negative pairs. """ + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints random_state = check_random_state(random_state) - a, b = self._pairs(num_constraints, same_label=True, + a, b = self._pairs(n_constraints, same_label=True, random_state=random_state) - c, d = self._pairs(num_constraints, same_label=False, + c, d = self._pairs(n_constraints, same_label=False, random_state=random_state) if same_length and len(a) != len(c): n = min(len(a), len(c)) @@ -188,15 +199,15 @@ def generate_knntriplets(self, X, k_genuine, k_impostor): return triplets - def _pairs(self, num_constraints, same_label=True, max_iter=10, + def _pairs(self, n_constraints, same_label=True, max_iter=10, random_state=np.random): known_label_idx, = np.where(self.partial_labels >= 0) known_labels = self.partial_labels[known_label_idx] num_labels = len(known_labels) ab = set() it = 0 - while it < max_iter and len(ab) < num_constraints: - nc = num_constraints - len(ab) + while it < max_iter and len(ab) < n_constraints: + nc = n_constraints - len(ab) for aidx in random_state.randint(num_labels, size=nc): if same_label: mask = known_labels[aidx] == known_labels @@ -207,25 +218,26 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, if len(b_choices) > 0: ab.add((aidx, random_state.choice(b_choices))) it += 1 - if len(ab) < num_constraints: + if len(ab) < n_constraints: warnings.warn("Only generated %d %s constraints (requested %d)" % ( - len(ab), 'positive' if same_label else 'negative', num_constraints)) - ab = np.array(list(ab)[:num_constraints], dtype=int) + len(ab), 'positive' if same_label else 'negative', n_constraints)) + ab = np.array(list(ab)[:n_constraints], dtype=int) return known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=None): + def chunks(self, n_chunks=100, chunk_size=2, random_state=None, + num_chunks='deprecated'): """ Generates chunks from labeled data. - Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from + Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from the same class drawn at random. Each point can belong to at most 1 chunk. - In the case where there is not enough points to generate ``num_chunks`` + In the case where there is not enough points to generate ``n_chunks`` chunks of size ``chunk_size``, a ValueError will be raised. Parameters ---------- - num_chunks : int, optional (default=100) + n_chunks : int, optional (default=100) Number of chunks to generate. chunk_size : int, optional (default=2) @@ -234,12 +246,20 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + Returns ------- chunks : array-like, shape=(n_samples,) 1D array of chunk indicators, where -1 indicates that the point does not belong to any chunk. """ + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks random_state = check_random_state(random_state) chunks = -np.ones_like(self.partial_labels, dtype=int) uniq, lookup = np.unique(self.partial_labels, return_inverse=True) @@ -247,13 +267,13 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq)) if c not in unknown_uniq] max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) - if max_chunks < num_chunks: + if max_chunks < n_chunks: raise ValueError(('Not enough possible chunks of %d elements in each' ' class to form expected %d chunks - maximum number' ' of chunks is %d' - ) % (chunk_size, num_chunks, max_chunks)) + ) % (chunk_size, n_chunks, max_chunks)) idx = 0 - while idx < num_chunks and all_inds: + while idx < n_chunks and all_inds: if len(all_inds) == 1: c = 0 else: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 43872b60..29b48daa 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -9,6 +9,7 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseITML(MahalanobisMixin): @@ -16,12 +17,20 @@ class _BaseITML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, + def __init__(self, gamma=1., max_iter=1000, tol=1e-3, prior='identity', verbose=False, - preprocessor=None, random_state=None): + preprocessor=None, random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.gamma = gamma self.max_iter = max_iter - self.convergence_threshold = convergence_threshold + self.tol = tol self.prior = prior self.verbose = verbose self.random_state = random_state @@ -86,7 +95,7 @@ def _fit(self, pairs, y, bounds=None): conv = np.inf break conv = np.abs(lambdaold - _lambda).sum() / normsum - if conv < self.convergence_threshold: + if conv < self.tol: break lambdaold = _lambda.copy() if self.verbose: @@ -122,7 +131,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): max_iter : int, optional (default=1000) Maximum number of iteration of the optimization procedure. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence tolerance. prior : string or numpy array, optional (default='identity') @@ -158,6 +167,8 @@ class ITML(_BaseITML, _PairsClassifierMixin): A pseudo random number generator object or a seed for it if int. If ``prior='random'``, ``random_state`` is used to set the prior. + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -260,10 +271,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin): max_iter : int, optional (default=1000) Maximum number of iterations of the optimization procedure. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Tolerance of the optimization procedure. - num_constraints : int, optional (default=None) + n_constraints : int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. @@ -302,6 +313,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin): case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 Attributes ---------- @@ -328,7 +342,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> itml = ITML_Supervised(num_constraints=200) + >>> itml = ITML_Supervised(n_constraints=200) >>> itml.fit(X, Y) See Also @@ -338,14 +352,26 @@ class ITML_Supervised(_BaseITML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3, - num_constraints=None, prior='identity', - verbose=False, preprocessor=None, random_state=None): + def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3, + n_constraints=None, prior='identity', + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, - convergence_threshold=convergence_threshold, + tol=tol, prior=prior, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_constraints = num_constraints + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y, bounds=None): """Create constraints from labels and learn the ITML model. @@ -369,13 +395,13 @@ def fit(self, X, y, bounds=None): points in the training data `X`. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 8bdc4bf0..47bb065f 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -5,6 +5,7 @@ from collections import Counter from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin +import warnings from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin @@ -63,7 +64,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): :meth:`fit` and n_features_a must be less than or equal to that. If ``n_components`` is not None, n_features_a must match it. - k : int, optional (default=3) + n_neighbors : int, optional (default=3) Number of neighbors to consider, not including self-edges. min_iter : int, optional (default=50) @@ -99,6 +100,8 @@ class LMNN(MahalanobisMixin, TransformerMixin): transformation. If ``init='pca'``, ``random_state`` is passed as an argument to PCA when initializing the transformation. + k : Renamed to n_neighbors. Will be deprecated in 0.7.0 + Attributes ---------- n_iter_ : `int` @@ -116,7 +119,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> lmnn = LMNN(k=5, learn_rate=1e-6) + >>> lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) >>> lmnn.fit(X, Y, verbose=False) References @@ -128,12 +131,19 @@ class LMNN(MahalanobisMixin, TransformerMixin): 2005. """ - def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, + def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, verbose=False, preprocessor=None, - n_components=None, random_state=None): + n_components=None, random_state=None, k='deprecated'): self.init = init - self.k = k + if k != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_neighbors = k + self.k = 'deprecated' # To avoid no_attribute error + self.n_neighbors = n_neighbors self.min_iter = min_iter self.max_iter = max_iter self.learn_rate = learn_rate @@ -145,7 +155,7 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, super(LMNN, self).__init__(preprocessor) def fit(self, X, y): - k = self.k + k = self.n_neighbors reg = self.regularization learn_rate = self.learn_rate @@ -162,7 +172,7 @@ def fit(self, X, y): self.verbose, random_state=self.random_state) required_k = np.bincount(label_inds).min() - if self.k > required_k: + if self.n_neighbors > required_k: raise ValueError('not enough class labels for specified k' ' (smallest class has %d)' % required_k) @@ -275,12 +285,12 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): return 2 * G, objective, total_active def _select_targets(self, X, label_inds): - target_neighbors = np.empty((X.shape[0], self.k), dtype=int) + target_neighbors = np.empty((X.shape[0], self.n_neighbors), dtype=int) for label in self.labels_: inds, = np.nonzero(label_inds == label) dd = euclidean_distances(X[inds], squared=True) np.fill_diagonal(dd, np.inf) - nn = np.argsort(dd)[..., :self.k] + nn = np.argsort(dd)[..., :self.n_neighbors] target_neighbors[inds] = inds[nn] return target_neighbors diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 28f65ce7..af7fa95b 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -9,6 +9,7 @@ from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseLSML(MahalanobisMixin): @@ -261,11 +262,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - num_constraints: int, optional (default=None) + n_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. - weights : (num_constraints,) array of floats, optional (default=None) + weights : (n_constraints,) array of floats, optional (default=None) Relative weight given to each constraint. If None, defaults to uniform weights. @@ -282,6 +283,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): prior. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import LSML_Supervised @@ -289,7 +292,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> lsml = LSML_Supervised(num_constraints=200) + >>> lsml = LSML_Supervised(n_constraints=200) >>> lsml.fit(X, Y) Attributes @@ -303,12 +306,22 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): """ def __init__(self, tol=1e-3, max_iter=1000, prior='identity', - num_constraints=None, weights=None, - verbose=False, preprocessor=None, random_state=None): + n_constraints=None, weights=None, + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated'): _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' self.weights = weights def fit(self, X, y): @@ -323,13 +336,13 @@ def fit(self, X, y): Data labels. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, + pos_neg = c.positive_negative_pairs(n_constraints, same_length=True, random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 1ff30b1e..5cf166fd 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -6,19 +6,28 @@ from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseMMC(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, + def __init__(self, max_iter=100, max_proj=10000, tol=1e-3, init='identity', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, - random_state=None): + random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.max_iter = max_iter self.max_proj = max_proj - self.convergence_threshold = convergence_threshold + self.tol = tol self.init = init self.diagonal = diagonal self.diagonal_c = diagonal_c @@ -145,13 +154,13 @@ def _fit_full(self, pairs, y): A[:] = A_old + alpha * M delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) - if delta < self.convergence_threshold: + if delta < self.tol: break if self.verbose: print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it + 1)) - if delta > self.convergence_threshold: + if delta > self.tol: self.converged_ = False if self.verbose: print('mmc did not converge, conv = %f' % (delta,)) @@ -185,7 +194,7 @@ def _fit_diag(self, pairs, y): reduction = 2.0 w = np.diag(self.A_).copy() - while error > self.convergence_threshold and it < self.max_iter: + while error > self.tol and it < self.max_iter: fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 @@ -332,7 +341,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin): max_proj : int, optional (default=10000) Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence threshold for the optimization procedure. init : string or numpy array, optional (default='identity') @@ -377,6 +386,8 @@ class MMC(_BaseMMC, _PairsClassifierMixin): ``init='random'``, ``random_state`` is used to initialize the random transformation. + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Attributes ---------- n_iter_ : `int` @@ -469,10 +480,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): max_proj : int, optional (default=10000) Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-3) + tol : float, optional (default=1e-3) Convergence threshold for the optimization procedure. - num_constraints: int, optional (default=None) + n_constraints: int, optional (default=None) Number of constraints to generate. If None, default to `20 * num_classes**2`. @@ -518,6 +529,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Mahalanobis matrix. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import MMC_Supervised @@ -525,7 +540,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> mmc = MMC_Supervised(num_constraints=200) + >>> mmc = MMC_Supervised(n_constraints=200) >>> mmc.fit(X, Y) Attributes @@ -538,16 +553,29 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): metric (See function `components_from_metric`.) """ - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_constraints=None, init='identity', + def __init__(self, max_iter=100, max_proj=10000, tol=1e-6, + n_constraints=None, init='identity', diagonal=False, diagonal_c=1.0, verbose=False, - preprocessor=None, random_state=None): + preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, - convergence_threshold=convergence_threshold, + tol=tol, init=init, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y): """Create constraints from labels and learn the MMC model. @@ -561,13 +589,13 @@ def fit(self, X, y): Data labels. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 34f7f3ff..aa726761 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -13,13 +13,13 @@ # mean center each chunklet separately def _chunk_mean_centering(data, chunks): - num_chunks = chunks.max() + 1 + n_chunks = chunks.max() + 1 chunk_mask = chunks != -1 # We need to ensure the data is float so that we can substract the # mean on it chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] - for c in range(num_chunks): + for c in range(n_chunks): mask = chunk_labels == c chunk_data[mask] -= chunk_data[mask].mean(axis=0) @@ -135,14 +135,14 @@ class RCA_Supervised(RCA): `RCA_Supervised` creates chunks of similar points by first sampling a class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. + `n_chunks` times. Parameters ---------- n_components : int or None, optional (default=None) Dimensionality of reduced space (if None, defaults to dimension of X). - num_chunks: int, optional (default=100) + n_chunks: int, optional (default=100) Number of chunks to generate. chunk_size: int, optional (default=2) @@ -156,6 +156,8 @@ class RCA_Supervised(RCA): A pseudo random number generator object or a seed for it if int. It is used to randomly sample constraints from labels. + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + Examples -------- >>> from metric_learn import RCA_Supervised @@ -163,7 +165,7 @@ class RCA_Supervised(RCA): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) + >>> rca = RCA_Supervised(n_chunks=30, chunk_size=2) >>> rca.fit(X, Y) Attributes @@ -172,17 +174,25 @@ class RCA_Supervised(RCA): The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_chunks=100, chunk_size=2, - preprocessor=None, random_state=None): + def __init__(self, n_components=None, n_chunks=100, chunk_size=2, + preprocessor=None, random_state=None, + num_chunks='deprecated'): """Initialize the supervised version of `RCA`.""" RCA.__init__(self, n_components=n_components, preprocessor=preprocessor) - self.num_chunks = num_chunks + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks + self.num_chunks = 'deprecated' # To avoid no_attribute error + self.n_chunks = n_chunks self.chunk_size = chunk_size self.random_state = random_state def fit(self, X, y): """Create constraints from labels and learn the RCA model. - Needs num_constraints specified in constructor. + Needs n_constraints specified in constructor. (Not true?) Parameters ---------- @@ -192,11 +202,11 @@ def fit(self, X, y): y : (n) data labels """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - chunks = Constraints(y).chunks(num_chunks=self.num_chunks, + chunks = Constraints(y).chunks(n_chunks=self.n_chunks, chunk_size=self.chunk_size, random_state=self.random_state) - if self.num_chunks * (self.chunk_size - 1) < X.shape[1]: + if self.n_chunks * (self.chunk_size - 1) < X.shape[1]: warnings.warn('Due to the parameters of RCA_Supervised, ' 'the inner covariance matrix is not invertible, ' 'so the transformation matrix will contain Nan values. ' diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index a0736ffa..93f3f441 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -177,7 +177,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> sdml = SDML_Supervised(num_constraints=200) + >>> sdml = SDML_Supervised(n_constraints=200) >>> sdml.fit(X, Y) References @@ -262,7 +262,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): (n_features, n_features), that will be used as such to set the prior. - num_constraints : int, optional (default=None) + n_constraints : int, optional (default=None) Number of constraints to generate. If None, defaults to `20 * num_classes**2`. @@ -279,6 +279,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): prior. In any case, `random_state` is also used to randomly sample constraints from labels. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) @@ -293,13 +295,22 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): """ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', - num_constraints=None, verbose=False, preprocessor=None, - random_state=None): + n_constraints=None, verbose=False, preprocessor=None, + random_state=None, num_constraints='deprecated'): _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' def fit(self, X, y): """Create constraints from labels and learn the SDML model. @@ -318,13 +329,13 @@ def fit(self, X, y): Returns the instance. """ X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2debe426..68be84c2 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -312,7 +312,7 @@ def test_large_output_iter(self): class TestLSML(MetricTestCase): def test_iris(self): - lsml = LSML_Supervised(num_constraints=200) + lsml = LSML_Supervised(n_constraints=200) lsml.fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) @@ -321,7 +321,7 @@ def test_iris(self): class TestITML(MetricTestCase): def test_iris(self): - itml = ITML_Supervised(num_constraints=200) + itml = ITML_Supervised(n_constraints=200) itml.fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(self.iris_points), self.iris_labels) @@ -367,7 +367,7 @@ def test_bounds_parameters_invalid(bounds): class TestLMNN(MetricTestCase): def test_iris(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(self.iris_points), @@ -384,7 +384,7 @@ def test_loss_grad_lbfgs(self): L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) lmnn = LMNN() - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -560,9 +560,9 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): def test_toy_ex_lmnn(X, y, loss): """Test that the loss give the right result on a toy example""" L = np.array([[1]]) - lmnn = LMNN(k=1, regularization=0.5) + lmnn = LMNN(n_neighbors=1, regularization=0.5) - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -736,7 +736,7 @@ def test_iris(self): # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', + sdml = SDML_Supervised(n_constraints=1500, prior='identity', balance_param=5e-5, random_state=rs) sdml.fit(self.iris_points, self.iris_labels) csep = class_separation(sdml.transform(self.iris_points), @@ -965,7 +965,7 @@ def test_iris(self): class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.29) @@ -991,15 +991,15 @@ def test_rank_deficient_returns_warning(self): def test_unknown_labels(self): n = 200 - num_chunks = 50 + n_chunks = 50 X, y = make_classification(random_state=42, n_samples=2 * n, n_features=6, n_informative=6, n_redundant=0) y2 = np.concatenate((y[:n], -np.ones(n))) - rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) rca.fit(X[:n], y[:n]) - rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca2 = RCA_Supervised(n_chunks=n_chunks, random_state=42) rca2.fit(X, y2) assert not np.any(np.isnan(rca.components_)) @@ -1009,11 +1009,11 @@ def test_unknown_labels(self): def test_bad_parameters(self): n = 200 - num_chunks = 3 + n_chunks = 3 X, y = make_classification(random_state=42, n_samples=n, n_features=6, n_informative=6, n_redundant=0) - rca = RCA_Supervised(num_chunks=num_chunks, random_state=42) + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) msg = ('Due to the parameters of RCA_Supervised, ' 'the inner covariance matrix is not invertible, ' 'so the transformation matrix will contain Nan values. ' @@ -1067,7 +1067,7 @@ def test_iris(self): # Full metric n_features = self.iris_points.shape[1] - mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10) + mmc = MMC(tol=0.01, init=np.eye(n_features) / 10) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 67f9b6a0..e13455be 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -36,15 +36,16 @@ def test_covariance(self): remove_spaces(f"Covariance({merged_kwargs})")) def test_lmnn(self): - def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'k': 3, + def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'n_neighbors': 3, 'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50, 'n_components': None, 'preprocessor': None, 'random_state': None, 'regularization': 0.5, 'verbose': False} - nndef_kwargs = {'convergence_tol': 0.01, 'k': 6} + nndef_kwargs = {'convergence_tol': 0.01, 'n_neighbors': 6} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))), + remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, + n_neighbors=6))), remove_spaces(f"LMNN({merged_kwargs})")) def test_nca(self): @@ -65,21 +66,21 @@ def test_lfda(self): remove_spaces(f"LFDA({merged_kwargs})")) def test_itml(self): - def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, + def_kwargs = {'tol': 0.001, 'gamma': 1.0, 'max_iter': 1000, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'verbose': False} nndef_kwargs = {'gamma': 0.5} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))), remove_spaces(f"ITML({merged_kwargs})")) - def_kwargs = {'convergence_threshold': 0.001, 'gamma': 1.0, - 'max_iter': 1000, 'num_constraints': None, + def_kwargs = {'tol': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'verbose': False} - nndef_kwargs = {'num_constraints': 7} + nndef_kwargs = {'n_constraints': 7} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))), + remove_spaces(str(metric_learn.ITML_Supervised(n_constraints=7))), remove_spaces(f"ITML_Supervised({merged_kwargs})")) def test_lsml(self): @@ -89,7 +90,7 @@ def test_lsml(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))), remove_spaces(f"LSML({merged_kwargs})")) - def_kwargs = {'max_iter': 1000, 'num_constraints': None, + def_kwargs = {'max_iter': 1000, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'tol': 0.001, 'verbose': False, 'weights': None} @@ -107,7 +108,7 @@ def test_sdml(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))), remove_spaces(f"SDML({merged_kwargs})")) - def_kwargs = {'balance_param': 0.5, 'num_constraints': None, + def_kwargs = {'balance_param': 0.5, 'n_constraints': None, 'preprocessor': None, 'prior': 'identity', 'random_state': None, 'sparsity_param': 0.01, 'verbose': False} @@ -123,12 +124,12 @@ def test_rca(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))), remove_spaces(f"RCA({merged_kwargs})")) - def_kwargs = {'chunk_size': 2, 'n_components': None, 'num_chunks': 100, + def_kwargs = {'chunk_size': 2, 'n_components': None, 'n_chunks': 100, 'preprocessor': None, 'random_state': None} - nndef_kwargs = {'num_chunks': 5} + nndef_kwargs = {'n_chunks': 5} merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))), + remove_spaces(str(metric_learn.RCA_Supervised(n_chunks=5))), remove_spaces(f"RCA_Supervised({merged_kwargs})")) def test_mlkr(self): @@ -141,7 +142,7 @@ def test_mlkr(self): remove_spaces(f"MLKR({merged_kwargs})")) def test_mmc(self): - def_kwargs = {'convergence_threshold': 0.001, 'diagonal': False, + def_kwargs = {'tol': 0.001, 'diagonal': False, 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, 'max_proj': 10000, 'preprocessor': None, 'random_state': None, 'verbose': False} @@ -149,9 +150,9 @@ def test_mmc(self): merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))), remove_spaces(f"MMC({merged_kwargs})")) - def_kwargs = {'convergence_threshold': 1e-06, 'diagonal': False, + def_kwargs = {'tol': 1e-06, 'diagonal': False, 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, - 'max_proj': 10000, 'num_constraints': None, + 'max_proj': 10000, 'n_constraints': None, 'preprocessor': None, 'random_state': None, 'verbose': False} nndef_kwargs = {'max_iter': 1} diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index 5502ad90..c6113957 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -29,27 +29,27 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) L = lsml.components_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) itml.fit(self.X, self.y) L = itml.components_ assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.components_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', + sdml = SDML_Supervised(n_constraints=1500, prior='identity', balance_param=1e-5, random_state=seed) sdml.fit(self.X, self.y) L = sdml.components_ @@ -69,7 +69,7 @@ def test_lfda(self): assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) rca.fit(self.X, self.y) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) diff --git a/test/test_constraints.py b/test/test_constraints.py index 92876779..9d01f3d3 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -7,14 +7,14 @@ SEED = 42 -def gen_labels_for_chunks(num_chunks, chunk_size, +def gen_labels_for_chunks(n_chunks, chunk_size, n_classes=10, n_unknown_labels=5): - """Generates num_chunks*chunk_size labels that split in num_chunks chunks, + """Generates n_chunks*chunk_size labels that split in n_chunks chunks, that are homogeneous in the label.""" - assert min(num_chunks, chunk_size) > 0 + assert min(n_chunks, chunk_size) > 0 classes = shuffle(np.arange(n_classes), random_state=SEED) - n_per_class = chunk_size * (num_chunks // n_classes) - n_maj_class = chunk_size * num_chunks - n_per_class * (n_classes - 1) + n_per_class = chunk_size * (n_chunks // n_classes) + n_maj_class = chunk_size * n_chunks - n_per_class * (n_classes - 1) first_labels = classes[0] * np.ones(n_maj_class, dtype=int) remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int) @@ -25,48 +25,48 @@ def gen_labels_for_chunks(num_chunks, chunk_size, return shuffle(labels, random_state=SEED) -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_exact_num_points_for_chunks(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_exact_num_points_for_chunks(n_chunks, chunk_size): """Checks that the chunk generation works well with just enough points.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) constraints = Constraints(labels) - chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0], return_counts=True) np.testing.assert_array_equal(size_each_chunk, chunk_size) - assert chunk_no.shape[0] == num_chunks + assert chunk_no.shape[0] == n_chunks -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_chunk_case_one_miss_point(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_chunk_case_one_miss_point(n_chunks, chunk_size): """Checks that the chunk generation breaks when one point is missing.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) assert len(labels) >= 1 constraints = Constraints(labels[1:]) with pytest.raises(ValueError) as e: - constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) expected_message = (('Not enough possible chunks of %d elements in each' ' class to form expected %d chunks - maximum number' ' of chunks is %d' - ) % (chunk_size, num_chunks, num_chunks - 1)) + ) % (chunk_size, n_chunks, n_chunks - 1)) assert str(e.value) == expected_message -@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)]) -def test_unknown_labels_not_in_chunks(num_chunks, chunk_size): +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_unknown_labels_not_in_chunks(n_chunks, chunk_size): """Checks that unknown labels are not assigned to any chunk.""" - labels = gen_labels_for_chunks(num_chunks, chunk_size) + labels = gen_labels_for_chunks(n_chunks, chunk_size) constraints = Constraints(labels) - chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size, + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, random_state=SEED) assert np.all(chunks[labels < 0] < 0) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index d4d4bfe0..246223b0 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -29,47 +29,47 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) res_2 = sdml.fit_transform(self.X, self.y) @@ -99,13 +99,13 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) res_2 = rca.fit_transform(self.X, self.y) @@ -123,12 +123,12 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e3d981a4..cc12788c 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -417,7 +417,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, random_state=rng) # To make the test work for LMNN: if 'LMNN' in model_base.__class__.__name__: - model_base.set_params(k=1) + model_base.set_params(n_neighbors=1) # To make the test faster for estimators that have a max_iter: if hasattr(model_base, 'max_iter'): model_base.set_params(max_iter=1) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 3ad69712..c56f43f2 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -29,7 +29,7 @@ def __init__(self, n_components=None, chunk_size=2, preprocessor=None, random_state=None): # this init makes RCA stable for scikit-learn examples. super(Stable_RCA_Supervised, self).__init__( - num_chunks=2, n_components=n_components, + n_chunks=2, n_components=n_components, chunk_size=chunk_size, preprocessor=preprocessor, random_state=random_state) @@ -37,12 +37,12 @@ def __init__(self, n_components=None, class Stable_SDML_Supervised(SDML_Supervised): def __init__(self, sparsity_param=0.01, - num_constraints=None, verbose=False, preprocessor=None, + n_constraints=None, verbose=False, preprocessor=None, random_state=None): # this init makes SDML stable for scikit-learn examples. super(Stable_SDML_Supervised, self).__init__( sparsity_param=sparsity_param, - num_constraints=num_constraints, verbose=verbose, + n_constraints=n_constraints, verbose=verbose, preprocessor=preprocessor, balance_param=1e-5, prior='identity', random_state=random_state) diff --git a/test/test_utils.py b/test/test_utils.py index 072b94c5..a32ee563 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -60,11 +60,11 @@ def build_regression(with_preprocessor=False): def build_data(): input_data, labels = load_iris(return_X_y=True) X, y = shuffle(input_data, labels, random_state=SEED) - num_constraints = 50 + n_constraints = 50 constraints = Constraints(y) pairs = ( constraints - .positive_negative_pairs(num_constraints, same_length=True, + .positive_negative_pairs(n_constraints, same_length=True, random_state=check_random_state(SEED))) return X, pairs @@ -137,7 +137,7 @@ def build_quadruplets(with_preprocessor=False): (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=5), build_classification), + (RCA_Supervised(n_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), build_classification), (SCML_Supervised(), build_classification)]