|
2 | 2 | import re
|
3 | 3 | import pytest
|
4 | 4 | import numpy as np
|
5 |
| -from scipy.optimize import check_grad |
| 5 | +from scipy.optimize import check_grad, approx_fprime |
6 | 6 | from six.moves import xrange
|
7 | 7 | from sklearn.metrics import pairwise_distances
|
8 | 8 | from sklearn.datasets import load_iris, make_classification, make_regression
|
|
21 | 21 | RCA_Supervised, MMC_Supervised, SDML, ITML)
|
22 | 22 | # Import this specially for testing.
|
23 | 23 | from metric_learn.constraints import wrap_pairs
|
24 |
| -from metric_learn.lmnn import python_LMNN |
| 24 | +from metric_learn.lmnn import python_LMNN, _sum_outer_products |
25 | 25 |
|
26 | 26 |
|
27 | 27 | def class_separation(X, labels):
|
@@ -157,6 +157,98 @@ def test_iris(self):
|
157 | 157 | self.iris_labels)
|
158 | 158 | self.assertLess(csep, 0.25)
|
159 | 159 |
|
| 160 | + def test_loss_grad_lbfgs(self): |
| 161 | + """Test gradient of loss function |
| 162 | + Assert that the gradient is almost equal to its finite differences |
| 163 | + approximation. |
| 164 | + """ |
| 165 | + rng = np.random.RandomState(42) |
| 166 | + X, y = make_classification(random_state=rng) |
| 167 | + L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) |
| 168 | + lmnn = LMNN() |
| 169 | + |
| 170 | + k = lmnn.k |
| 171 | + reg = lmnn.regularization |
| 172 | + |
| 173 | + X, y = lmnn._prepare_inputs(X, y, dtype=float, |
| 174 | + ensure_min_samples=2) |
| 175 | + num_pts, num_dims = X.shape |
| 176 | + unique_labels, label_inds = np.unique(y, return_inverse=True) |
| 177 | + lmnn.labels_ = np.arange(len(unique_labels)) |
| 178 | + lmnn.transformer_ = np.eye(num_dims) |
| 179 | + |
| 180 | + target_neighbors = lmnn._select_targets(X, label_inds) |
| 181 | + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) |
| 182 | + |
| 183 | + # sum outer products |
| 184 | + dfG = _sum_outer_products(X, target_neighbors.flatten(), |
| 185 | + np.repeat(np.arange(X.shape[0]), k)) |
| 186 | + df = np.zeros_like(dfG) |
| 187 | + |
| 188 | + # storage |
| 189 | + a1 = [None]*k |
| 190 | + a2 = [None]*k |
| 191 | + for nn_idx in xrange(k): |
| 192 | + a1[nn_idx] = np.array([]) |
| 193 | + a2[nn_idx] = np.array([]) |
| 194 | + |
| 195 | + # initialize L |
| 196 | + def loss_grad(flat_L): |
| 197 | + return lmnn._loss_grad(X, flat_L.reshape(-1, X.shape[1]), dfG, impostors, |
| 198 | + 1, k, reg, target_neighbors, df.copy(), |
| 199 | + list(a1), list(a2)) |
| 200 | + |
| 201 | + def fun(x): |
| 202 | + return loss_grad(x)[1] |
| 203 | + |
| 204 | + def grad(x): |
| 205 | + return loss_grad(x)[0].ravel() |
| 206 | + |
| 207 | + # compute relative error |
| 208 | + epsilon = np.sqrt(np.finfo(float).eps) |
| 209 | + rel_diff = (check_grad(fun, grad, L.ravel()) / |
| 210 | + np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) |
| 211 | + np.testing.assert_almost_equal(rel_diff, 0., decimal=5) |
| 212 | + |
| 213 | + |
| 214 | +@pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), |
| 215 | + [1, 1, 0, 0], 3.0), |
| 216 | + (np.array([[0], [1], [2], [3]]), |
| 217 | + [1, 0, 0, 1], 26.)]) |
| 218 | +def test_toy_ex_lmnn(X, y, loss): |
| 219 | + """Test that the loss give the right result on a toy example""" |
| 220 | + L = np.array([[1]]) |
| 221 | + lmnn = LMNN(k=1, regularization=0.5) |
| 222 | + |
| 223 | + k = lmnn.k |
| 224 | + reg = lmnn.regularization |
| 225 | + |
| 226 | + X, y = lmnn._prepare_inputs(X, y, dtype=float, |
| 227 | + ensure_min_samples=2) |
| 228 | + num_pts, num_dims = X.shape |
| 229 | + unique_labels, label_inds = np.unique(y, return_inverse=True) |
| 230 | + lmnn.labels_ = np.arange(len(unique_labels)) |
| 231 | + lmnn.transformer_ = np.eye(num_dims) |
| 232 | + |
| 233 | + target_neighbors = lmnn._select_targets(X, label_inds) |
| 234 | + impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) |
| 235 | + |
| 236 | + # sum outer products |
| 237 | + dfG = _sum_outer_products(X, target_neighbors.flatten(), |
| 238 | + np.repeat(np.arange(X.shape[0]), k)) |
| 239 | + df = np.zeros_like(dfG) |
| 240 | + |
| 241 | + # storage |
| 242 | + a1 = [None]*k |
| 243 | + a2 = [None]*k |
| 244 | + for nn_idx in xrange(k): |
| 245 | + a1[nn_idx] = np.array([]) |
| 246 | + a2[nn_idx] = np.array([]) |
| 247 | + |
| 248 | + # assert that the loss equals the one computed by hand |
| 249 | + assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, k, |
| 250 | + reg, target_neighbors, df, a1, a2)[1] == loss |
| 251 | + |
160 | 252 |
|
161 | 253 | def test_convergence_simple_example(capsys):
|
162 | 254 | # LMNN should converge on this simple example, which it did not with
|
@@ -458,7 +550,9 @@ def grad(M):
|
458 | 550 | return nca._loss_grad_lbfgs(M, X, mask)[1].ravel()
|
459 | 551 |
|
460 | 552 | # compute relative error
|
461 |
| - rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) |
| 553 | + epsilon = np.sqrt(np.finfo(float).eps) |
| 554 | + rel_diff = (check_grad(fun, grad, M.ravel()) / |
| 555 | + np.linalg.norm(approx_fprime(M.ravel(), fun, epsilon))) |
462 | 556 | np.testing.assert_almost_equal(rel_diff, 0., decimal=6)
|
463 | 557 |
|
464 | 558 | def test_simple_example(self):
|
|
0 commit comments