From f2cb2e95d9ddb08c44d47ede93f521a29e83fdc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Tue, 23 May 2017 08:51:57 +0200 Subject: [PATCH 1/7] Implementation of PGDM --- README.rst | 1 + metric_learn/__init__.py | 1 + metric_learn/pgdm.py | 436 ++++++++++++++++++++++++++++++++++++ test/metric_learn_test.py | 47 +++- test/test_base_metric.py | 11 + test/test_fit_transform.py | 14 +- test/test_sklearn_compat.py | 9 +- 7 files changed, 515 insertions(+), 4 deletions(-) create mode 100644 metric_learn/pgdm.py diff --git a/README.rst b/README.rst index 9bb762b4..af692623 100644 --- a/README.rst +++ b/README.rst @@ -15,6 +15,7 @@ Metric Learning algorithms in Python. - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) - Metric Learning for Kernel Regression (MLKR) +- Probabilistic Global Distance Metric Learning (PGDM) **Dependencies** diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index 5a7508c0..adc5307e 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -10,3 +10,4 @@ from .lfda import LFDA from .rca import RCA, RCA_Supervised from .mlkr import MLKR +from .pgdm import PGDM, PGDM_Supervised diff --git a/metric_learn/pgdm.py b/metric_learn/pgdm.py new file mode 100644 index 00000000..09cc8ab5 --- /dev/null +++ b/metric_learn/pgdm.py @@ -0,0 +1,436 @@ +""" +Probabilistic Global Distance Metric Learning, Xing et al., NIPS 2002 + +PGDM minimizes the sum of squared distances between similar examples, +while enforcing the sum of distances between dissimilar examples to be +greater than a certain margin. +This leads to a convex and, thus, local-minima-free optimization problem +that can be solved efficiently. +However, the algorithm involves the computation of eigenvalues, which is the +main speed-bottleneck. +Since it has initially been designed for clustering applications, one of the +implicit assumptions of PGDM is that all classes form a compact set, i.e., +follow a unimodal distribution, which restricts the possible use-cases of +this method. However, it is one of the earliest and a still often cited technique. + +Adapted from Matlab code at http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz +""" + +from __future__ import print_function, absolute_import +import numpy as np +from six.moves import xrange +from sklearn.metrics import pairwise_distances +from sklearn.utils.validation import check_array, check_X_y + +from .base_metric import BaseMetricLearner +from .constraints import Constraints + + +# hack around lack of axis kwarg in older numpy versions +try: + np.linalg.norm([[4]], axis=1) +except TypeError: + def _vector_norm(X): + return np.apply_along_axis(np.linalg.norm, 1, X) +else: + def _vector_norm(X): + return np.linalg.norm(X, axis=1) + + +class PGDM(BaseMetricLearner): + """Probabilistic Global Distance Metric Learning (PGDM)""" + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, + A0=None, diagonal=False, diagonal_c=1, verbose=False): + """Initialize PGDM. + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + A0 : (d x d) matrix, optional + initial metric, defaults to identity + only the main diagonal is taken if `diagonal == True` + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + """ + self.max_iter = max_iter + self.max_proj = max_proj + self.convergence_threshold = convergence_threshold + self.A0 = A0 + self.diagonal = diagonal + self.diagonal_c = diagonal_c + self.verbose = verbose + + def fit(self, X, constraints): + """Learn the PGDM model. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + constraints = self._process_inputs(X, constraints) + if self.diagonal: + return self._fit_diag(X, constraints) + else: + return self._fit_full(X, constraints) + + def _process_inputs(self, X, constraints): + + self.X_ = X = check_array(X) + + # check to make sure that no two constrained vectors are identical + a,b,c,d = constraints + ident = _vector_norm(X[a] - X[b]) > 1e-9 + a, b = a[ident], b[ident] + ident = _vector_norm(X[c] - X[d]) > 1e-9 + c, d = c[ident], d[ident] + + # init metric + if self.A0 is None: + self.A_ = np.identity(X.shape[1]) + if not self.diagonal: + # Don't know why division by 10... it's in the original code + # and seems to affect the overall scale of the learned metric. + self.A_ /= 10 + else: + self.A_ = check_array(self.A0) + + return a,b,c,d + + def _fit_full(self, X, constraints): + """Learn full metric using PGDM. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + a,b,c,d = constraints + num_pos = len(a) + num_neg = len(c) + num_samples, num_dim = X.shape + + error1 = error2 = 1e10 + eps = 0.01 # error-bound of iterative projection on C1 and C2 + A = self.A_ + + # Create weight vector from similar samples + pos_diff = X[a] - X[b] + w = np.einsum('ij,ik->jk', pos_diff, pos_diff).ravel() + # `w` is the sum of all outer products of the rows in `pos_diff`. + # The above `einsum` is equivalent to the much more inefficient: + # w = np.apply_along_axis( + # lambda x: np.outer(x,x).ravel(), + # 1, + # X[a] - X[b] + # ).sum(axis = 0) + t = w.dot(A.ravel() / 100.0) + + w1 = w / np.linalg.norm(w) # make `w` a unit vector + t1 = t / np.linalg.norm(w) # distance from origin to `w^T*x=t` plane + + cycle = 1 + alpha = 0.1 # initial step size along gradient + + grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function + grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function + M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 + + A_old = A.copy() + + for cycle in xrange(self.max_iter): + + # projection of constraints C1 and C2 + satisfy = False + + for it in xrange(self.max_proj): + + # First constraint: + # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1) + # (1) can be rewritten as a linear constraint: w^T x = t, + # where x is the unrolled matrix of A, + # w is also an unrolled matrix of W where + # W_{kl}= \sum_{i,j \in S}d_ij^k * d_ij^l + x0 = A.ravel() + if w.dot(x0) <= t: + x = x0 + else: + x = x0 + (t1 - w1.dot(x0)) * w1 + A[:] = x.reshape(num_dim, num_dim) + + # Second constraint: + # PSD constraint A >= 0 + # project A onto domain A>0 + l, V = np.linalg.eigh((A + A.T) / 2) + A[:] = np.dot(V * np.maximum(0, l[None,:]), V.T) + + fDC2 = w.dot(A.ravel()) + error2 = (fDC2 - t) / t + if error2 < eps: + satisfy = True + break + + # third constraint: gradient ascent + # max: g(A) >= 1 + # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij) + + obj_previous = self._fD(X, c, d, A_old) # g(A_old) + obj = self._fD(X, c, d, A) # g(A) + + if ((obj > obj_previous) or (cycle == 0)) and (satisfy): + + # If projection of 1 and 2 is successful, and such projection + # imprives objective function, slightly increase learning rate + # and update from the current A. + alpha *= 1.05 + A_old[:] = A + grad2 = self._fS1(X, a, b, A) + grad1 = self._fD1(X, c, d, A) + M = self._grad_projection(grad1, grad2) + A += alpha * M + + else: + + # If projection of 1 and 2 failed, or obj <= obj_previous due + # to projection of 1 and 2, shrink learning rate and re-update + # from the previous A. + alpha /= 2 + A[:] = A_old + alpha * M + + delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) + if delta < self.convergence_threshold: + break + if self.verbose: + print('pgdm iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) + + if delta > self.convergence_threshold: + self.converged_ = False + if self.verbose: + print('pgdm did not converge, conv = %f' % (delta,)) + else: + self.converged_ = True + if self.verbose: + print('pgdm converged at iter %d, conv = %f' % (cycle, delta)) + self.A_[:] = A_old + self.n_iter_ = cycle + return self + + def _fit_diag(self, X, constraints): + """Learn diagonal metric using PGDM. + Parameters + ---------- + X : (n x d) data matrix + each row corresponds to a single instance + constraints : 4-tuple of arrays + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs + """ + a,b,c,d = constraints + num_pos = len(a) + num_neg = len(c) + num_samples, num_dim = X.shape + + s_sum = np.sum((X[a] - X[b]) ** 2, axis = 0) + + it = 0 + error = 1 + eps = 1e-6 + reduction = 2 + w = np.diag(self.A_).copy() + + while error > self.convergence_threshold: + + fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) + obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 + fS_1st_d = s_sum # first derivative of the similarity constraints + + gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective + hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective + step = np.dot(np.linalg.inv(hessian), gradient); + + # Newton-Rapshon update + # search over optimal lambda + lambd = 1 # initial step-size + w_tmp = np.maximum(0, w - lambd * step) + + obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + obj_previous = obj * 1.1 # just to get the while-loop started + + inner_it = 0 + while obj < obj_previous: + obj_previous = obj + w_previous = w_tmp.copy() + lambd /= reduction + w_tmp = np.maximum(0, w - lambd * step) + obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) + inner_it += 1 + + w[:] = w_previous + error = np.abs((obj_previous - obj_initial) / obj_previous) + if self.verbose: + print('pgdm iter: %d, conv = %f' % (it, error)) + it += 1 + + self.A_ = np.diag(w) + return self + + def _fD(self, X, c, d, A): + """The value of the dissimilarity constraint function. + + f = f(\sum_{ij \in D} distance(x_i, x_j)) + i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} + """ + diff = X[c] - X[d] + return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis = 1))) + 1e-6) + + def _fD1(self, X, c, d, A): + """The gradient of the dissimilarity constraint function w.r.t. A. + + For example, let distance by L1 norm: + f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'}) + df/dA_{kl} = f'* d(\sum_{ij \in D} \sqrt{(x_i-x_j)^k*(x_i-x_j)^l})/dA_{kl} + + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) + so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij + df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) + * 0.5*(\sum_{ij \in D} (1/sqrt{tr(d_ij'*d_ij*A)})*(d_ij'*d_ij)) + """ + dim = X.shape[1] + diff = X[c] - X[d] + M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` + dist = np.sqrt(M.dot(A).trace(axis1 = 1, axis2 = 2)) + sum_deri = np.sum(0.5 * (M / (dist[:,None,None] + 1e-6)), axis = 0) + sum_dist = dist.sum() + return sum_deri / (sum_dist + 1e-6) + + def _fS1(self, X, a, b, A): + """The gradient of the similarity constraint function w.r.t. A. + + f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' + df/dA = d(d_ij*A*d_ij')/dA + + Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) + so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij + """ + dim = X.shape[1] + diff = X[a] - X[b] + return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` + + def _grad_projection(self, grad1, grad2): + grad2 = grad2 / np.linalg.norm(grad2) + gtemp = grad1 - np.sum(grad1 * grad2) * grad2 + gtemp /= np.linalg.norm(gtemp) + return gtemp + + def _D_objective(self, X, c, d, w): + return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis = 1) + 1e-6))) + + def _D_constraint(self, X, c, d, w): + """Compute the value, 1st derivative, second derivative (Hessian) of + a dissimilarity constraint function gF(sum_ij distance(d_ij A d_ij)) + where A is a diagonal matrix (in the form of a column vector 'w'). + """ + diff = X[c] - X[d] + diff_sq = diff * diff + dist = np.sqrt(diff_sq.dot(w)) + sum_deri1 = np.sum(diff_sq / (2 * np.maximum(dist, 1e-6))[:,None], axis = 0) + sum_deri2 = np.sum( + np.einsum('ij,ik->ijk', diff_sq, diff_sq) / (-4 * np.maximum(1e-6, dist**3)[:,None,None]), + axis = 0 + ) + sum_dist = dist.sum() + return ( + np.log(sum_dist), + sum_deri1 / sum_dist, + sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) + ) + + def metric(self): + return self.A_ + + def transformer(self): + """Computes the transformation matrix from the Mahalanobis matrix. + L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with + the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors. + + The Cholesky decomposition cannot be applied here, since PGDM learns only a positive + *semi*-definite Mahalanobis matrix. + + Returns + ------- + L : (d x d) matrix + """ + if self.diagonal: + return np.sqrt(self.A_) + else: + w, V = np.linalg.eigh(self.A_) + return V.T * np.sqrt(np.maximum(0, w[:,None])) + + +class PGDM_Supervised(PGDM): + """Probabilistic Global Distance Metric Learning (PGDM)""" + def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, + num_labeled=np.inf, num_constraints=None, + A0=None, diagonal=False, diagonal_c=1, verbose=False): + """Initialize the learner. + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + num_labeled : int, optional + number of labels to preserve for training + num_constraints: int, optional + number of constraints to generate + A0 : (d x d) matrix, optional + initial metric, defaults to identity + only the main diagonal is taken if `diagonal == True` + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + """ + PGDM.__init__(self, max_iter=max_iter, max_proj=max_proj, + convergence_threshold=convergence_threshold, + A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, + verbose=verbose) + self.num_labeled = num_labeled + self.num_constraints = num_constraints + + def fit(self, X, y, random_state=np.random): + """Create constraints from labels and learn the PGDM model. + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + y : (n) array-like + Data labels. + random_state : numpy.random.RandomState, optional + If provided, controls random number generation. + """ + X, y = check_X_y(X, y) + num_constraints = self.num_constraints + if num_constraints is None: + num_classes = len(np.unique(y)) + num_constraints = 20 * num_classes**2 + + c = Constraints.random_subset(y, self.num_labeled, + random_state=random_state) + pos_neg = c.positive_negative_pairs(num_constraints, + random_state=random_state) + return PGDM.fit(self, X, pos_neg) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1e7f31fe..9694ec02 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,8 +6,8 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LMNN, NCA, LFDA, Covariance, MLKR, PGDM, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -149,5 +149,48 @@ def test_iris(self): self.assertLess(csep, 0.25) +class TestPGDM(MetricTestCase): + def test_iris(self): + + # Generate full set of constraints for comparison with reference implementation + n = self.iris_points.shape[0] + a, b, c, d = [], [], [], [] + for i in range(n): + for j in range(i+1, n): + if self.iris_labels[i] == self.iris_labels[j]: + a.append(i) + b.append(j) + else: + c.append(i) + d.append(j) + + # Full metric + pgdm = PGDM(convergence_threshold = 0.01) + pgdm.fit(self.iris_points, [np.asarray(x) for x in [a,b,c,d]]) + expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], + [+0.00083371, +0.00149466, -0.00200719, -0.00296284], + [-0.00111959, -0.00200719, +0.00269546, +0.00397881], + [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] + assert_array_almost_equal(expected, pgdm.metric(), decimal=6) + + # Diagonal metric + pgdm = PGDM(diagonal = True) + pgdm.fit(self.iris_points, [np.asarray(x) for x in [a,b,c,d]]) + expected = [0, 0, 1.21045968, 1.22552608] + assert_array_almost_equal(np.diag(expected), pgdm.metric(), decimal=6) + + # Supervised Full + pgdm = PGDM_Supervised() + pgdm.fit(self.iris_points, self.iris_labels) + csep = class_separation(pgdm.transform(), self.iris_labels) + self.assertLess(csep, 0.15) + + # Supervised Diagonal + pgdm = PGDM_Supervised(diagonal = True) + pgdm.fit(self.iris_points, self.iris_labels) + csep = class_separation(pgdm.transform(), self.iris_labels) + self.assertLess(csep, 0.2) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_base_metric.py b/test/test_base_metric.py index d73138cd..6b35dd78 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -63,5 +63,16 @@ def test_mlkr(self): "MLKR(A0=None, alpha=0.0001, epsilon=0.01, " "max_iter=1000, num_dims=None)") + def test_pgdm(self): + self.assertEqual(str(metric_learn.PGDM()), """ +PGDM(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1, + max_iter=100, max_proj=10000, verbose=False) +""".strip('\n')) + self.assertEqual(str(metric_learn.PGDM_Supervised()), """ +PGDM_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, + diagonal_c=1, max_iter=100, max_proj=10000, num_constraints=None, + num_labeled=inf, verbose=False) +""".strip('\n')) + if __name__ == '__main__': unittest.main() diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index eff8fa01..9e687a63 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -5,7 +5,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) class TestFitTransform(unittest.TestCase): @@ -118,6 +118,18 @@ def test_mlkr(self): assert_array_almost_equal(res_1, res_2) + def test_pgdm_supervised(self): + seed = np.random.RandomState(1234) + pgdm = PGDM_Supervised(num_constraints=200) + pgdm.fit(self.X, self.y, random_state=seed) + res_1 = pgdm.transform() + + seed = np.random.RandomState(1234) + pgdm = PGDM_Supervised(num_constraints=200) + res_2 = pgdm.fit_transform(self.X, self.y, random_state=seed) + + assert_array_almost_equal(res_1, res_2) + if __name__ == '__main__': unittest.main() diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 58c7cd05..156a6c63 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -4,7 +4,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -22,6 +22,10 @@ class dITML(deterministic_mixin, ITML_Supervised): pass +class dPGDM(deterministic_mixin, PGDM_Supervised): + pass + + class dSDML(deterministic_mixin, SDML_Supervised): pass @@ -52,6 +56,9 @@ def test_lsml(self): def test_itml(self): check_estimator(dITML) + def test_pgdm(self): + check_estimator(dPGDM) + # This fails due to a FloatingPointError # def test_sdml(self): # check_estimator(dSDML) From fc1d026bc630fb0ca9b76e872cb1ca7b0ac109b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Tue, 23 May 2017 11:40:10 +0200 Subject: [PATCH 2/7] Python2 compatibility --- metric_learn/pgdm.py | 10 +++++----- test/test_base_metric.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metric_learn/pgdm.py b/metric_learn/pgdm.py index 09cc8ab5..4b6c6ce5 100644 --- a/metric_learn/pgdm.py +++ b/metric_learn/pgdm.py @@ -40,7 +40,7 @@ def _vector_norm(X): class PGDM(BaseMetricLearner): """Probabilistic Global Distance Metric Learning (PGDM)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, - A0=None, diagonal=False, diagonal_c=1, verbose=False): + A0=None, diagonal=False, diagonal_c=1.0, verbose=False): """Initialize PGDM. Parameters ---------- @@ -100,7 +100,7 @@ def _process_inputs(self, X, constraints): if not self.diagonal: # Don't know why division by 10... it's in the original code # and seems to affect the overall scale of the learned metric. - self.A_ /= 10 + self.A_ /= 10.0 else: self.A_ = check_array(self.A0) @@ -244,9 +244,9 @@ def _fit_diag(self, X, constraints): s_sum = np.sum((X[a] - X[b]) ** 2, axis = 0) it = 0 - error = 1 + error = 1.0 eps = 1e-6 - reduction = 2 + reduction = 2.0 w = np.diag(self.A_).copy() while error > self.convergence_threshold: @@ -382,7 +382,7 @@ class PGDM_Supervised(PGDM): """Probabilistic Global Distance Metric Learning (PGDM)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled=np.inf, num_constraints=None, - A0=None, diagonal=False, diagonal_c=1, verbose=False): + A0=None, diagonal=False, diagonal_c=1.0, verbose=False): """Initialize the learner. Parameters ---------- diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 6b35dd78..3292260a 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -65,12 +65,12 @@ def test_mlkr(self): def test_pgdm(self): self.assertEqual(str(metric_learn.PGDM()), """ -PGDM(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1, +PGDM(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, verbose=False) """.strip('\n')) self.assertEqual(str(metric_learn.PGDM_Supervised()), """ PGDM_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, - diagonal_c=1, max_iter=100, max_proj=10000, num_constraints=None, + diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, num_labeled=inf, verbose=False) """.strip('\n')) From 23067e44c5df92d4adbba8c6ac022c3e5d728932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Tue, 23 May 2017 13:32:40 +0200 Subject: [PATCH 3/7] Speed up PGDM on high-dimensional data --- metric_learn/pgdm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/pgdm.py b/metric_learn/pgdm.py index 4b6c6ce5..6b48bfed 100644 --- a/metric_learn/pgdm.py +++ b/metric_learn/pgdm.py @@ -309,8 +309,8 @@ def _fD1(self, X, c, d, A): dim = X.shape[1] diff = X[c] - X[d] M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` - dist = np.sqrt(M.dot(A).trace(axis1 = 1, axis2 = 2)) - sum_deri = np.sum(0.5 * (M / (dist[:,None,None] + 1e-6)), axis = 0) + dist = np.sqrt(np.sum(M * A[None,:,:], axis = (1,2))) + sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis = 0) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) From 758276179ac79e24b0e23a3ea1b09be8ea516987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Thu, 25 May 2017 10:34:23 +0200 Subject: [PATCH 4/7] Optimized some summations using `np.einsum` --- metric_learn/pgdm.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/metric_learn/pgdm.py b/metric_learn/pgdm.py index 6b48bfed..89d0e451 100644 --- a/metric_learn/pgdm.py +++ b/metric_learn/pgdm.py @@ -308,9 +308,9 @@ def _fD1(self, X, c, d, A): """ dim = X.shape[1] diff = X[c] - X[d] - M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` - dist = np.sqrt(np.sum(M * A[None,:,:], axis = (1,2))) - sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis = 0) + M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` + dist = np.sqrt(np.einsum('ijk,jk', M, A)) # equivalent to: np.sqrt(np.sum(M * A[None,:,:], axis = (1,2))) + sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) # equivalent to: np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis = 0) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) @@ -344,10 +344,11 @@ def _D_constraint(self, X, c, d, w): diff = X[c] - X[d] diff_sq = diff * diff dist = np.sqrt(diff_sq.dot(w)) - sum_deri1 = np.sum(diff_sq / (2 * np.maximum(dist, 1e-6))[:,None], axis = 0) - sum_deri2 = np.sum( - np.einsum('ij,ik->ijk', diff_sq, diff_sq) / (-4 * np.maximum(1e-6, dist**3)[:,None,None]), - axis = 0 + sum_deri1 = np.einsum('ij,i', diff_sq, 0.5 / np.maximum(dist, 1e-6)) + sum_deri2 = np.einsum( + 'ijk,i', + np.einsum('ij,ik->ijk', diff_sq, diff_sq), + -0.25 / np.maximum(1e-6, dist**3) ) sum_dist = dist.sum() return ( From 9a29405b32a02d464d1f0854761fdfb99e09cf4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Thu, 25 May 2017 11:29:21 +0200 Subject: [PATCH 5/7] Addressed requests from review by perimosocordiae --- metric_learn/_util.py | 12 +++++++ metric_learn/itml.py | 19 +++-------- metric_learn/pgdm.py | 70 ++++++++++++++++++--------------------- test/metric_learn_test.py | 22 +++++------- 4 files changed, 58 insertions(+), 65 deletions(-) create mode 100644 metric_learn/_util.py diff --git a/metric_learn/_util.py b/metric_learn/_util.py new file mode 100644 index 00000000..b34860d6 --- /dev/null +++ b/metric_learn/_util.py @@ -0,0 +1,12 @@ +import numpy as np + + +# hack around lack of axis kwarg in older numpy versions +try: + np.linalg.norm([[4]], axis=1) +except TypeError: + def vector_norm(X): + return np.apply_along_axis(np.linalg.norm, 1, X) +else: + def vector_norm(X): + return np.linalg.norm(X, axis=1) \ No newline at end of file diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4c154ad4..7169fb36 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -21,6 +21,7 @@ from .base_metric import BaseMetricLearner from .constraints import Constraints +from ._util import vector_norm class ITML(BaseMetricLearner): @@ -54,10 +55,10 @@ def _process_inputs(self, X, constraints, bounds): self.X_ = X = check_array(X) # check to make sure that no two constrained vectors are identical a,b,c,d = constraints - ident = _vector_norm(X[a] - X[b]) > 1e-9 - a, b = a[ident], b[ident] - ident = _vector_norm(X[c] - X[d]) > 1e-9 - c, d = c[ident], d[ident] + no_ident = vector_norm(X[a] - X[b]) > 1e-9 + a, b = a[no_ident], b[no_ident] + no_ident = vector_norm(X[c] - X[d]) > 1e-9 + c, d = c[no_ident], d[no_ident] # init bounds if bounds is None: self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) @@ -138,16 +139,6 @@ def fit(self, X, constraints, bounds=None): def metric(self): return self.A_ -# hack around lack of axis kwarg in older numpy versions -try: - np.linalg.norm([[4]], axis=1) -except TypeError: - def _vector_norm(X): - return np.apply_along_axis(np.linalg.norm, 1, X) -else: - def _vector_norm(X): - return np.linalg.norm(X, axis=1) - class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" diff --git a/metric_learn/pgdm.py b/metric_learn/pgdm.py index 89d0e451..8e2e02a0 100644 --- a/metric_learn/pgdm.py +++ b/metric_learn/pgdm.py @@ -16,7 +16,7 @@ Adapted from Matlab code at http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz """ -from __future__ import print_function, absolute_import +from __future__ import print_function, absolute_import, division import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances @@ -24,18 +24,9 @@ from .base_metric import BaseMetricLearner from .constraints import Constraints +from ._util import vector_norm -# hack around lack of axis kwarg in older numpy versions -try: - np.linalg.norm([[4]], axis=1) -except TypeError: - def _vector_norm(X): - return np.apply_along_axis(np.linalg.norm, 1, X) -else: - def _vector_norm(X): - return np.linalg.norm(X, axis=1) - class PGDM(BaseMetricLearner): """Probabilistic Global Distance Metric Learning (PGDM)""" @@ -89,10 +80,14 @@ def _process_inputs(self, X, constraints): # check to make sure that no two constrained vectors are identical a,b,c,d = constraints - ident = _vector_norm(X[a] - X[b]) > 1e-9 - a, b = a[ident], b[ident] - ident = _vector_norm(X[c] - X[d]) > 1e-9 - c, d = c[ident], d[ident] + no_ident = vector_norm(X[a] - X[b]) > 1e-9 + a, b = a[no_ident], b[no_ident] + no_ident = vector_norm(X[c] - X[d]) > 1e-9 + c, d = c[no_ident], d[no_ident] + if len(a) == 0: + raise RuntimeError('No similarity constraints given for PGDM.') + if len(c) == 0: + raise RuntimeError('No dissimilarity constraints given for PGDM.') # init metric if self.A0 is None: @@ -135,17 +130,18 @@ def _fit_full(self, X, constraints): # 1, # X[a] - X[b] # ).sum(axis = 0) - t = w.dot(A.ravel() / 100.0) + t = w.dot(A.ravel()) / 100.0 - w1 = w / np.linalg.norm(w) # make `w` a unit vector - t1 = t / np.linalg.norm(w) # distance from origin to `w^T*x=t` plane + w_norm = np.linalg.norm(w) + w1 = w / w_norm # make `w` a unit vector + t1 = t / w_norm # distance from origin to `w^T*x=t` plane cycle = 1 - alpha = 0.1 # initial step size along gradient + alpha = 0.1 # initial step size along gradient - grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function - grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function - M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 + grad1 = self._fS1(X, a, b, A) # gradient of similarity constraint function + grad2 = self._fD1(X, c, d, A) # gradient of dissimilarity constraint function + M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 A_old = A.copy() @@ -185,13 +181,13 @@ def _fit_full(self, X, constraints): # max: g(A) >= 1 # here we suppose g(A) = fD(A) = \sum_{I,J \in D} sqrt(d_ij' A d_ij) - obj_previous = self._fD(X, c, d, A_old) # g(A_old) - obj = self._fD(X, c, d, A) # g(A) + obj_previous = self._fD(X, c, d, A_old) # g(A_old) + obj = self._fD(X, c, d, A) # g(A) - if ((obj > obj_previous) or (cycle == 0)) and (satisfy): + if satisfy and (obj > obj_previous or cycle == 0): # If projection of 1 and 2 is successful, and such projection - # imprives objective function, slightly increase learning rate + # improves objective function, slightly increase learning rate # and update from the current A. alpha *= 1.05 A_old[:] = A @@ -241,7 +237,7 @@ def _fit_diag(self, X, constraints): num_neg = len(c) num_samples, num_dim = X.shape - s_sum = np.sum((X[a] - X[b]) ** 2, axis = 0) + s_sum = np.sum((X[a] - X[b]) ** 2, axis=0) it = 0 error = 1.0 @@ -253,19 +249,19 @@ def _fit_diag(self, X, constraints): fD0, fD_1st_d, fD_2nd_d = self._D_constraint(X, c, d, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 - fS_1st_d = s_sum # first derivative of the similarity constraints + fS_1st_d = s_sum # first derivative of the similarity constraints - gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective - hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective + gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective + hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective step = np.dot(np.linalg.inv(hessian), gradient); # Newton-Rapshon update # search over optimal lambda - lambd = 1 # initial step-size + lambd = 1 # initial step-size w_tmp = np.maximum(0, w - lambd * step) obj = np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(X, c, d, w_tmp) - obj_previous = obj * 1.1 # just to get the while-loop started + obj_previous = obj * 1.1 # just to get the while-loop started inner_it = 0 while obj < obj_previous: @@ -292,7 +288,7 @@ def _fD(self, X, c, d, A): i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} """ diff = X[c] - X[d] - return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis = 1))) + 1e-6) + return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) def _fD1(self, X, c, d, A): """The gradient of the dissimilarity constraint function w.r.t. A. @@ -309,8 +305,8 @@ def _fD1(self, X, c, d, A): dim = X.shape[1] diff = X[c] - X[d] M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` - dist = np.sqrt(np.einsum('ijk,jk', M, A)) # equivalent to: np.sqrt(np.sum(M * A[None,:,:], axis = (1,2))) - sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) # equivalent to: np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis = 0) + dist = np.sqrt(np.einsum('ijk,jk', M, A)) # equivalent to: np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) + sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) # equivalent to: np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) @@ -325,7 +321,7 @@ def _fS1(self, X, a, b, A): """ dim = X.shape[1] diff = X[a] - X[b] - return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` + return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` def _grad_projection(self, grad1, grad2): grad2 = grad2 / np.linalg.norm(grad2) @@ -334,7 +330,7 @@ def _grad_projection(self, grad1, grad2): return gtemp def _D_objective(self, X, c, d, w): - return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis = 1) + 1e-6))) + return np.log(np.sum(np.sqrt(np.sum(((X[c] - X[d]) ** 2) * w[None,:], axis=1) + 1e-6))) def _D_constraint(self, X, c, d, w): """Compute the value, 1st derivative, second derivative (Hessian) of diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 9694ec02..2aa37687 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -154,19 +154,13 @@ def test_iris(self): # Generate full set of constraints for comparison with reference implementation n = self.iris_points.shape[0] - a, b, c, d = [], [], [], [] - for i in range(n): - for j in range(i+1, n): - if self.iris_labels[i] == self.iris_labels[j]: - a.append(i) - b.append(j) - else: - c.append(i) - d.append(j) + mask = (self.iris_labels[None] == self.iris_labels[:,None]) + a, b = np.nonzero(np.triu(mask, k=1)) + c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric - pgdm = PGDM(convergence_threshold = 0.01) - pgdm.fit(self.iris_points, [np.asarray(x) for x in [a,b,c,d]]) + pgdm = PGDM(convergence_threshold=0.01) + pgdm.fit(self.iris_points, [a,b,c,d]) expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], [+0.00083371, +0.00149466, -0.00200719, -0.00296284], [-0.00111959, -0.00200719, +0.00269546, +0.00397881], @@ -174,8 +168,8 @@ def test_iris(self): assert_array_almost_equal(expected, pgdm.metric(), decimal=6) # Diagonal metric - pgdm = PGDM(diagonal = True) - pgdm.fit(self.iris_points, [np.asarray(x) for x in [a,b,c,d]]) + pgdm = PGDM(diagonal=True) + pgdm.fit(self.iris_points, [a,b,c,d]) expected = [0, 0, 1.21045968, 1.22552608] assert_array_almost_equal(np.diag(expected), pgdm.metric(), decimal=6) @@ -186,7 +180,7 @@ def test_iris(self): self.assertLess(csep, 0.15) # Supervised Diagonal - pgdm = PGDM_Supervised(diagonal = True) + pgdm = PGDM_Supervised(diagonal=True) pgdm.fit(self.iris_points, self.iris_labels) csep = class_separation(pgdm.transform(), self.iris_labels) self.assertLess(csep, 0.2) From e9893c3c0dd5b81ef017086e88646f0a46122d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Thu, 25 May 2017 11:46:08 +0200 Subject: [PATCH 6/7] Renamed PGDM to MMC --- README.rst | 2 +- metric_learn/__init__.py | 2 +- metric_learn/{pgdm.py => mmc.py} | 48 ++++++++++++++++---------------- test/metric_learn_test.py | 30 ++++++++++---------- test/test_base_metric.py | 12 ++++---- test/test_fit_transform.py | 14 +++++----- test/test_sklearn_compat.py | 8 +++--- 7 files changed, 58 insertions(+), 58 deletions(-) rename metric_learn/{pgdm.py => mmc.py} (91%) diff --git a/README.rst b/README.rst index af692623..1e8adbe7 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ Metric Learning algorithms in Python. - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) - Metric Learning for Kernel Regression (MLKR) -- Probabilistic Global Distance Metric Learning (PGDM) +- Mahalanobis Metric for Clustering (MMC) **Dependencies** diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index adc5307e..b86c10e1 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -10,4 +10,4 @@ from .lfda import LFDA from .rca import RCA, RCA_Supervised from .mlkr import MLKR -from .pgdm import PGDM, PGDM_Supervised +from .mmc import MMC, MMC_Supervised diff --git a/metric_learn/pgdm.py b/metric_learn/mmc.py similarity index 91% rename from metric_learn/pgdm.py rename to metric_learn/mmc.py index 8e2e02a0..36c16812 100644 --- a/metric_learn/pgdm.py +++ b/metric_learn/mmc.py @@ -1,7 +1,7 @@ """ -Probabilistic Global Distance Metric Learning, Xing et al., NIPS 2002 +Mahalanobis Metric Learning with Application for Clustering with Side-Information, Xing et al., NIPS 2002 -PGDM minimizes the sum of squared distances between similar examples, +MMC minimizes the sum of squared distances between similar examples, while enforcing the sum of distances between dissimilar examples to be greater than a certain margin. This leads to a convex and, thus, local-minima-free optimization problem @@ -9,7 +9,7 @@ However, the algorithm involves the computation of eigenvalues, which is the main speed-bottleneck. Since it has initially been designed for clustering applications, one of the -implicit assumptions of PGDM is that all classes form a compact set, i.e., +implicit assumptions of MMC is that all classes form a compact set, i.e., follow a unimodal distribution, which restricts the possible use-cases of this method. However, it is one of the earliest and a still often cited technique. @@ -28,11 +28,11 @@ -class PGDM(BaseMetricLearner): - """Probabilistic Global Distance Metric Learning (PGDM)""" +class MMC(BaseMetricLearner): + """Mahalanobis Metric for Clustering (MMC)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, A0=None, diagonal=False, diagonal_c=1.0, verbose=False): - """Initialize PGDM. + """Initialize MMC. Parameters ---------- max_iter : int, optional @@ -59,7 +59,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, self.verbose = verbose def fit(self, X, constraints): - """Learn the PGDM model. + """Learn the MMC model. Parameters ---------- X : (n x d) data matrix @@ -85,9 +85,9 @@ def _process_inputs(self, X, constraints): no_ident = vector_norm(X[c] - X[d]) > 1e-9 c, d = c[no_ident], d[no_ident] if len(a) == 0: - raise RuntimeError('No similarity constraints given for PGDM.') + raise RuntimeError('No similarity constraints given for MMC.') if len(c) == 0: - raise RuntimeError('No dissimilarity constraints given for PGDM.') + raise RuntimeError('No dissimilarity constraints given for MMC.') # init metric if self.A0 is None: @@ -102,7 +102,7 @@ def _process_inputs(self, X, constraints): return a,b,c,d def _fit_full(self, X, constraints): - """Learn full metric using PGDM. + """Learn full metric using MMC. Parameters ---------- X : (n x d) data matrix @@ -208,22 +208,22 @@ def _fit_full(self, X, constraints): if delta < self.convergence_threshold: break if self.verbose: - print('pgdm iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) + print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) if delta > self.convergence_threshold: self.converged_ = False if self.verbose: - print('pgdm did not converge, conv = %f' % (delta,)) + print('mmc did not converge, conv = %f' % (delta,)) else: self.converged_ = True if self.verbose: - print('pgdm converged at iter %d, conv = %f' % (cycle, delta)) + print('mmc converged at iter %d, conv = %f' % (cycle, delta)) self.A_[:] = A_old self.n_iter_ = cycle return self def _fit_diag(self, X, constraints): - """Learn diagonal metric using PGDM. + """Learn diagonal metric using MMC. Parameters ---------- X : (n x d) data matrix @@ -275,7 +275,7 @@ def _fit_diag(self, X, constraints): w[:] = w_previous error = np.abs((obj_previous - obj_initial) / obj_previous) if self.verbose: - print('pgdm iter: %d, conv = %f' % (it, error)) + print('mmc iter: %d, conv = %f' % (it, error)) it += 1 self.A_ = np.diag(w) @@ -361,7 +361,7 @@ def transformer(self): L = V.T * w^(-1/2), with A = V*w*V.T being the eigenvector decomposition of A with the eigenvalues in the diagonal matrix w and the columns of V being the eigenvectors. - The Cholesky decomposition cannot be applied here, since PGDM learns only a positive + The Cholesky decomposition cannot be applied here, since MMC learns only a positive *semi*-definite Mahalanobis matrix. Returns @@ -375,8 +375,8 @@ def transformer(self): return V.T * np.sqrt(np.maximum(0, w[:,None])) -class PGDM_Supervised(PGDM): - """Probabilistic Global Distance Metric Learning (PGDM)""" +class MMC_Supervised(MMC): + """Mahalanobis Metric for Clustering (MMC)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled=np.inf, num_constraints=None, A0=None, diagonal=False, diagonal_c=1.0, verbose=False): @@ -402,15 +402,15 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, verbose : bool, optional if True, prints information while learning """ - PGDM.__init__(self, max_iter=max_iter, max_proj=max_proj, - convergence_threshold=convergence_threshold, - A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, - verbose=verbose) + MMC.__init__(self, max_iter=max_iter, max_proj=max_proj, + convergence_threshold=convergence_threshold, + A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, + verbose=verbose) self.num_labeled = num_labeled self.num_constraints = num_constraints def fit(self, X, y, random_state=np.random): - """Create constraints from labels and learn the PGDM model. + """Create constraints from labels and learn the MMC model. Parameters ---------- X : (n x d) matrix @@ -430,4 +430,4 @@ def fit(self, X, y, random_state=np.random): random_state=random_state) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) - return PGDM.fit(self, X, pos_neg) + return MMC.fit(self, X, pos_neg) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2aa37687..351b6298 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -6,8 +6,8 @@ from numpy.testing import assert_array_almost_equal from metric_learn import ( - LMNN, NCA, LFDA, Covariance, MLKR, PGDM, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) + LMNN, NCA, LFDA, Covariance, MLKR, MMC, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) # Import this specially for testing. from metric_learn.lmnn import python_LMNN @@ -149,7 +149,7 @@ def test_iris(self): self.assertLess(csep, 0.25) -class TestPGDM(MetricTestCase): +class TestMMC(MetricTestCase): def test_iris(self): # Generate full set of constraints for comparison with reference implementation @@ -159,30 +159,30 @@ def test_iris(self): c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric - pgdm = PGDM(convergence_threshold=0.01) - pgdm.fit(self.iris_points, [a,b,c,d]) + mmc = MMC(convergence_threshold=0.01) + mmc.fit(self.iris_points, [a,b,c,d]) expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], [+0.00083371, +0.00149466, -0.00200719, -0.00296284], [-0.00111959, -0.00200719, +0.00269546, +0.00397881], [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] - assert_array_almost_equal(expected, pgdm.metric(), decimal=6) + assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric - pgdm = PGDM(diagonal=True) - pgdm.fit(self.iris_points, [a,b,c,d]) + mmc = MMC(diagonal=True) + mmc.fit(self.iris_points, [a,b,c,d]) expected = [0, 0, 1.21045968, 1.22552608] - assert_array_almost_equal(np.diag(expected), pgdm.metric(), decimal=6) + assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full - pgdm = PGDM_Supervised() - pgdm.fit(self.iris_points, self.iris_labels) - csep = class_separation(pgdm.transform(), self.iris_labels) + mmc = MMC_Supervised() + mmc.fit(self.iris_points, self.iris_labels) + csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.15) # Supervised Diagonal - pgdm = PGDM_Supervised(diagonal=True) - pgdm.fit(self.iris_points, self.iris_labels) - csep = class_separation(pgdm.transform(), self.iris_labels) + mmc = MMC_Supervised(diagonal=True) + mmc.fit(self.iris_points, self.iris_labels) + csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.2) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 3292260a..31db4e6f 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -63,13 +63,13 @@ def test_mlkr(self): "MLKR(A0=None, alpha=0.0001, epsilon=0.01, " "max_iter=1000, num_dims=None)") - def test_pgdm(self): - self.assertEqual(str(metric_learn.PGDM()), """ -PGDM(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, - max_iter=100, max_proj=10000, verbose=False) + def test_mmc(self): + self.assertEqual(str(metric_learn.MMC()), """ +MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, + max_iter=100, max_proj=10000, verbose=False) """.strip('\n')) - self.assertEqual(str(metric_learn.PGDM_Supervised()), """ -PGDM_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, + self.assertEqual(str(metric_learn.MMC_Supervised()), """ +MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, num_labeled=inf, verbose=False) """.strip('\n')) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 9e687a63..707815ec 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -5,7 +5,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) class TestFitTransform(unittest.TestCase): @@ -118,15 +118,15 @@ def test_mlkr(self): assert_array_almost_equal(res_1, res_2) - def test_pgdm_supervised(self): + def test_mmc_supervised(self): seed = np.random.RandomState(1234) - pgdm = PGDM_Supervised(num_constraints=200) - pgdm.fit(self.X, self.y, random_state=seed) - res_1 = pgdm.transform() + mmc = MMC_Supervised(num_constraints=200) + mmc.fit(self.X, self.y, random_state=seed) + res_1 = mmc.transform() seed = np.random.RandomState(1234) - pgdm = PGDM_Supervised(num_constraints=200) - res_2 = pgdm.fit_transform(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200) + res_2 = mmc.fit_transform(self.X, self.y, random_state=seed) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 156a6c63..f1e1a09d 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -4,7 +4,7 @@ from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, - LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, PGDM_Supervised) + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MMC_Supervised) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -22,7 +22,7 @@ class dITML(deterministic_mixin, ITML_Supervised): pass -class dPGDM(deterministic_mixin, PGDM_Supervised): +class dMMC(deterministic_mixin, MMC_Supervised): pass @@ -56,8 +56,8 @@ def test_lsml(self): def test_itml(self): check_estimator(dITML) - def test_pgdm(self): - check_estimator(dPGDM) + def test_mmc(self): + check_estimator(dMMC) # This fails due to a FloatingPointError # def test_sdml(self): From 11d7d0a64c5b270a0f71defef133a6ec1d77a761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Barz?= Date: Thu, 25 May 2017 16:07:23 +0200 Subject: [PATCH 7/7] Addressed 2nd review by perimosocordiae --- metric_learn/mmc.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 36c16812..7760e1b1 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -85,9 +85,9 @@ def _process_inputs(self, X, constraints): no_ident = vector_norm(X[c] - X[d]) > 1e-9 c, d = c[no_ident], d[no_ident] if len(a) == 0: - raise RuntimeError('No similarity constraints given for MMC.') + raise ValueError('No non-trivial similarity constraints given for MMC.') if len(c) == 0: - raise RuntimeError('No dissimilarity constraints given for MMC.') + raise ValueError('No non-trivial dissimilarity constraints given for MMC.') # init metric if self.A0 is None: @@ -304,9 +304,12 @@ def _fD1(self, X, c, d, A): """ dim = X.shape[1] diff = X[c] - X[d] - M = np.einsum('ij,ik->ijk', diff, diff) # outer products of all rows in `diff` - dist = np.sqrt(np.einsum('ijk,jk', M, A)) # equivalent to: np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) - sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) # equivalent to: np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) + # outer products of all rows in `diff` + M = np.einsum('ij,ik->ijk', diff, diff) + # faster version of: dist = np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) + dist = np.sqrt(np.einsum('ijk,jk', M, A)) + # faster version of: sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) + sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) @@ -342,9 +345,9 @@ def _D_constraint(self, X, c, d, w): dist = np.sqrt(diff_sq.dot(w)) sum_deri1 = np.einsum('ij,i', diff_sq, 0.5 / np.maximum(dist, 1e-6)) sum_deri2 = np.einsum( - 'ijk,i', - np.einsum('ij,ik->ijk', diff_sq, diff_sq), - -0.25 / np.maximum(1e-6, dist**3) + 'ij,ik->jk', + diff_sq, + diff_sq / (-4 * np.maximum(1e-6, dist**3))[:,None] ) sum_dist = dist.sum() return (