2
2
import re
3
3
import pytest
4
4
import numpy as np
5
+ import scipy
5
6
from scipy .optimize import check_grad , approx_fprime
6
7
from six .moves import xrange
7
- from sklearn .metrics import pairwise_distances
8
+ from sklearn .metrics import pairwise_distances , euclidean_distances
8
9
from sklearn .datasets import (load_iris , make_classification , make_regression ,
9
10
make_spd_matrix )
10
11
from numpy .testing import (assert_array_almost_equal , assert_array_equal ,
@@ -304,25 +305,15 @@ def test_loss_grad_lbfgs(self):
304
305
lmnn .components_ = np .eye (n_components )
305
306
306
307
target_neighbors = lmnn ._select_targets (X , label_inds )
307
- impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds )
308
308
309
309
# sum outer products
310
310
dfG = _sum_outer_products (X , target_neighbors .flatten (),
311
311
np .repeat (np .arange (X .shape [0 ]), k ))
312
- df = np .zeros_like (dfG )
313
-
314
- # storage
315
- a1 = [None ]* k
316
- a2 = [None ]* k
317
- for nn_idx in xrange (k ):
318
- a1 [nn_idx ] = np .array ([])
319
- a2 [nn_idx ] = np .array ([])
320
312
321
313
# initialize L
322
314
def loss_grad (flat_L ):
323
- return lmnn ._loss_grad (X , flat_L .reshape (- 1 , X .shape [1 ]), dfG , impostors ,
324
- 1 , k , reg , target_neighbors , df .copy (),
325
- list (a1 ), list (a2 ))
315
+ return lmnn ._loss_grad (X , flat_L .reshape (- 1 , X .shape [1 ]), dfG ,
316
+ k , reg , target_neighbors , label_inds )
326
317
327
318
def fun (x ):
328
319
return loss_grad (x )[1 ]
@@ -366,6 +357,141 @@ def test_deprecation_use_pca(self):
366
357
assert_warns_message (DeprecationWarning , msg , lmnn .fit , X , y )
367
358
368
359
360
+ def test_loss_func (capsys ):
361
+ """Test the loss function (and its gradient) on a simple example,
362
+ by comparing the results with the actual implementation of metric-learn,
363
+ with a very simple (but nonperformant) implementation"""
364
+
365
+ # toy dataset to use
366
+ X , y = make_classification (n_samples = 10 , n_classes = 2 ,
367
+ n_features = 6 ,
368
+ n_redundant = 0 , shuffle = True ,
369
+ scale = [1 , 1 , 20 , 20 , 20 , 20 ], random_state = 42 )
370
+
371
+ def hinge (a ):
372
+ if a > 0 :
373
+ return a , 1
374
+ else :
375
+ return 0 , 0
376
+
377
+ def loss_fn (L , X , y , target_neighbors , reg ):
378
+ L = L .reshape (- 1 , X .shape [1 ])
379
+ Lx = np .dot (X , L .T )
380
+ loss = 0
381
+ total_active = 0
382
+ grad = np .zeros_like (L )
383
+ for i in range (X .shape [0 ]):
384
+ for j in target_neighbors [i ]:
385
+ loss += (1 - reg ) * np .sum ((Lx [i ] - Lx [j ]) ** 2 )
386
+ grad += (1 - reg ) * np .outer (Lx [i ] - Lx [j ], X [i ] - X [j ])
387
+ for l in range (X .shape [0 ]):
388
+ if y [i ] != y [l ]:
389
+ hin , active = hinge (1 + np .sum ((Lx [i ] - Lx [j ])** 2 ) -
390
+ np .sum ((Lx [i ] - Lx [l ])** 2 ))
391
+ total_active += active
392
+ if active :
393
+ loss += reg * hin
394
+ grad += (reg * (np .outer (Lx [i ] - Lx [j ], X [i ] - X [j ]) -
395
+ np .outer (Lx [i ] - Lx [l ], X [i ] - X [l ])))
396
+ grad = 2 * grad
397
+ return grad , loss , total_active
398
+
399
+ # we check that the gradient we have computed in the non-performant implem
400
+ # is indeed the true gradient on a toy example:
401
+
402
+ def _select_targets (X , y , k ):
403
+ target_neighbors = np .empty ((X .shape [0 ], k ), dtype = int )
404
+ for label in np .unique (y ):
405
+ inds , = np .nonzero (y == label )
406
+ dd = euclidean_distances (X [inds ], squared = True )
407
+ np .fill_diagonal (dd , np .inf )
408
+ nn = np .argsort (dd )[..., :k ]
409
+ target_neighbors [inds ] = inds [nn ]
410
+ return target_neighbors
411
+
412
+ target_neighbors = _select_targets (X , y , 2 )
413
+ regularization = 0.5
414
+ n_features = X .shape [1 ]
415
+ x0 = np .random .randn (1 , n_features )
416
+
417
+ def loss (x0 ):
418
+ return loss_fn (x0 .reshape (- 1 , X .shape [1 ]), X , y , target_neighbors ,
419
+ regularization )[1 ]
420
+
421
+ def grad (x0 ):
422
+ return loss_fn (x0 .reshape (- 1 , X .shape [1 ]), X , y , target_neighbors ,
423
+ regularization )[0 ].ravel ()
424
+
425
+ scipy .optimize .check_grad (loss , grad , x0 .ravel ())
426
+
427
+ class LMNN_with_callback (LMNN ):
428
+ """ We will use a callback to get the gradient (see later)
429
+ """
430
+
431
+ def __init__ (self , callback , * args , ** kwargs ):
432
+ self .callback = callback
433
+ super (LMNN_with_callback , self ).__init__ (* args , ** kwargs )
434
+
435
+ def _loss_grad (self , * args , ** kwargs ):
436
+ grad , objective , total_active = (
437
+ super (LMNN_with_callback , self )._loss_grad (* args , ** kwargs ))
438
+ self .callback .append (grad )
439
+ return grad , objective , total_active
440
+
441
+ class LMNN_nonperformant (LMNN_with_callback ):
442
+
443
+ def fit (self , X , y ):
444
+ self .y = y
445
+ return super (LMNN_nonperformant , self ).fit (X , y )
446
+
447
+ def _loss_grad (self , X , L , dfG , k , reg , target_neighbors , label_inds ):
448
+ grad , loss , total_active = loss_fn (L .ravel (), X , self .y ,
449
+ target_neighbors , self .regularization )
450
+ self .callback .append (grad )
451
+ return grad , loss , total_active
452
+
453
+ mem1 , mem2 = [], []
454
+ lmnn_perf = LMNN_with_callback (verbose = True , random_state = 42 ,
455
+ init = 'identity' , max_iter = 30 , callback = mem1 )
456
+ lmnn_nonperf = LMNN_nonperformant (verbose = True , random_state = 42 ,
457
+ init = 'identity' , max_iter = 30 ,
458
+ callback = mem2 )
459
+ objectives , obj_diffs , learn_rate , total_active = (dict (), dict (), dict (),
460
+ dict ())
461
+ for algo , name in zip ([lmnn_perf , lmnn_nonperf ], ['perf' , 'nonperf' ]):
462
+ algo .fit (X , y )
463
+ out , _ = capsys .readouterr ()
464
+ lines = re .split ("\n +" , out )
465
+ # we get every variable that is printed from the algorithm in verbose
466
+ num = '(-?\d+.?\d*(e[+|-]\d+)?)'
467
+ strings = [re .search ("\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})"
468
+ .format (num , num , num ), s ) for s in lines ]
469
+ objectives [name ] = [float (match .group (1 )) for match in strings if match is
470
+ not None ]
471
+ obj_diffs [name ] = [float (match .group (3 )) for match in strings if match is
472
+ not None ]
473
+ total_active [name ] = [float (match .group (5 )) for match in strings if
474
+ match is not
475
+ None ]
476
+ learn_rate [name ] = [float (match .group (6 )) for match in strings if match is
477
+ not None ]
478
+ assert len (strings ) >= 10 # we ensure that we actually did more than 10
479
+ # iterations
480
+ assert total_active [name ][0 ] >= 2 # we ensure that we have some active
481
+ # constraints (that's the case we want to test)
482
+ # we remove the last element because it can be equal to the penultimate
483
+ # if the last gradient update is null
484
+ for i in range (len (mem1 )):
485
+ np .testing .assert_allclose (lmnn_perf .callback [i ],
486
+ lmnn_nonperf .callback [i ],
487
+ err_msg = 'Gradient different at position '
488
+ '{}' .format (i ))
489
+ np .testing .assert_allclose (objectives ['perf' ], objectives ['nonperf' ])
490
+ np .testing .assert_allclose (obj_diffs ['perf' ], obj_diffs ['nonperf' ])
491
+ np .testing .assert_allclose (total_active ['perf' ], total_active ['nonperf' ])
492
+ np .testing .assert_allclose (learn_rate ['perf' ], learn_rate ['nonperf' ])
493
+
494
+
369
495
@pytest .mark .parametrize ('X, y, loss' , [(np .array ([[0 ], [1 ], [2 ], [3 ]]),
370
496
[1 , 1 , 0 , 0 ], 3.0 ),
371
497
(np .array ([[0 ], [1 ], [2 ], [3 ]]),
@@ -386,7 +512,7 @@ def test_toy_ex_lmnn(X, y, loss):
386
512
lmnn .components_ = np .eye (n_components )
387
513
388
514
target_neighbors = lmnn ._select_targets (X , label_inds )
389
- impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds )
515
+ impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds , L )
390
516
391
517
# sum outer products
392
518
dfG = _sum_outer_products (X , target_neighbors .flatten (),
@@ -401,9 +527,8 @@ def test_toy_ex_lmnn(X, y, loss):
401
527
a2 [nn_idx ] = np .array ([])
402
528
403
529
# assert that the loss equals the one computed by hand
404
- assert lmnn ._loss_grad (X , L .reshape (- 1 , X .shape [1 ]), dfG , impostors , 1 , k ,
405
- reg , target_neighbors , df , a1 , a2 )[1 ] == loss
406
-
530
+ assert lmnn ._loss_grad (X , L .reshape (- 1 , X .shape [1 ]), dfG , k ,
531
+ reg , target_neighbors , label_inds )[1 ] == loss
407
532
408
533
def test_convergence_simple_example (capsys ):
409
534
# LMNN should converge on this simple example, which it did not with
0 commit comments