-
Notifications
You must be signed in to change notification settings - Fork 229
[MRG] FIX LMNN gradient and cost function #201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
86d26d3
61eea28
d5c9dd0
b238b65
f9511a0
562f33b
65057e3
69c328c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,7 +105,7 @@ def fit(self, X, y): | |
# objective than the previous L, following the gradient: | ||
while True: | ||
# the next point next_L to try out is found by a gradient step | ||
L_next = L - 2 * learn_rate * G | ||
L_next = L - learn_rate * G | ||
# we compute the objective at next point | ||
# we copy variables that can be modified by _loss_grad, because if we | ||
# retry we don t want to modify them several times | ||
|
@@ -191,10 +191,12 @@ def _loss_grad(self, X, L, dfG, impostors, it, k, reg, target_neighbors, df, | |
# do the gradient update | ||
assert not np.isnan(df).any() | ||
G = dfG * reg + df * (1 - reg) | ||
|
||
grad = 2 * L.dot(G) | ||
# compute the objective function | ||
objective = total_active * (1 - reg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact this was right, it's the term coming from the +1 that is in the hinge loss (cf. question from #46) |
||
objective += G.flatten().dot(L.T.dot(L).flatten()) | ||
return G, objective, total_active, df, a1, a2 | ||
return grad, objective, total_active, df, a1, a2 | ||
|
||
def _select_targets(self, X, label_inds): | ||
target_neighbors = np.empty((X.shape[0], self.k), dtype=int) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,24 +158,75 @@ def test_loss_grad_lbfgs(self): | |
# initialize L | ||
|
||
def fun(L): | ||
return lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, | ||
k, reg, | ||
target_neighbors, df, a1, a2)[1] | ||
# we copy variables that can be modified by _loss_grad, because we | ||
# want to have the same result when applying the function twice | ||
return lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, | ||
1, k, reg, target_neighbors, df.copy(), | ||
list(a1), list(a2))[1] | ||
|
||
def grad(L): | ||
# we copy variables that can be modified by _loss_grad, because we | ||
# want to have the same result when applying the function twice | ||
return lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, | ||
1, k, reg, | ||
target_neighbors, df, a1, a2)[0].ravel() | ||
1, k, reg, target_neighbors, df.copy(), | ||
list(a1), list(a2))[0].ravel() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test seems particularly brittle, as it replicates so much LMNN functionality. Ideally we could structure the LMNN code to make this simpler, but at the least we should avoid copy-paste here: def loss_grad(flat_L):
return lmnn._loss_grad(X, flat_L.reshape(...), ...)
fun = lambda x: loss_grad(x)[1]
grad = lambda x: loss_grad(x)[0].ravel() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes you're right, let's go for this clearer expression |
||
|
||
# compute relative error | ||
epsilon = np.sqrt(np.finfo(float).eps) | ||
rel_diff = (check_grad(fun, grad, L.ravel()) / | ||
np.linalg.norm(approx_fprime(L.ravel(), fun, | ||
epsilon))) | ||
# np.linalg.norm(grad(L)) | ||
np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) | ||
np.testing.assert_almost_equal(rel_diff, 0., decimal=5) | ||
|
||
|
||
@pytest.mark.parametrize('X, y, loss', [(np.array([[0], [1], [2], [3]]), | ||
[1, 1, 0, 0], 3.0), | ||
(np.array([[0], [1], [2], [3]]), | ||
[1, 0, 0, 1], 26.)]) | ||
def test_toy_ex_lmnn(X, y, loss): | ||
"""Test that the loss give the right result on a toy example""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where did you get these values from? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I computed them by hand, according to the paper (note that it's only the loss function and that I set L to identity so it's easy since the distances are easy to get from this 1D example). |
||
L = np.array([[1]]) | ||
lmnn = LMNN(k=1, regularization=0.5) | ||
|
||
k = lmnn.k | ||
reg = lmnn.regularization | ||
|
||
X, y = lmnn._prepare_inputs(X, y, dtype=float, | ||
ensure_min_samples=2) | ||
num_pts, num_dims = X.shape | ||
unique_labels, label_inds = np.unique(y, return_inverse=True) | ||
lmnn.labels_ = np.arange(len(unique_labels)) | ||
lmnn.transformer_ = np.eye(num_dims) | ||
|
||
target_neighbors = lmnn._select_targets(X, label_inds) | ||
impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds) | ||
|
||
# sum outer products | ||
dfG = _sum_outer_products(X, target_neighbors.flatten(), | ||
np.repeat(np.arange(X.shape[0]), k)) | ||
df = np.zeros_like(dfG) | ||
|
||
# storage | ||
a1 = [None]*k | ||
a2 = [None]*k | ||
for nn_idx in xrange(k): | ||
a1[nn_idx] = np.array([]) | ||
a2[nn_idx] = np.array([]) | ||
|
||
# initialize L | ||
|
||
def fun(L): | ||
return lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, | ||
k, reg, | ||
target_neighbors, df, a1, a2)[1] | ||
|
||
def grad(L): | ||
return lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, impostors, 1, | ||
k, reg, target_neighbors, df, a1, a2)[0].ravel() | ||
|
||
# compute relative error | ||
assert fun(L) == loss | ||
|
||
|
||
def test_convergence_simple_example(capsys): | ||
# LMNN should converge on this simple example, which it did not with | ||
# this issue: https://github.com/metric-learn/metric-learn/issues/88 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I put the multiplication by 2 here so that we can test the real gradient using
check_grad