[MRG] Fix quadruplets scoring (#220)

wdevazelhes · perimosocordiae · commit 8c3cb3e348f3 · 2019-06-18T11:07:53.000-04:00
* FIX: fix lsml scoring * Address #220 (review)
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -641,4 +641,9 @@ def score(self, quadruplets):
     score : float
       The quadruplets score.
     """
-    return - np.mean(self.predict(quadruplets))
+    # Since the prediction is a vector of values in {-1, +1}, we need to
+    # rescale them to {0, 1} to compute the accuracy using the mean (because
+    # then 1 means a correctly classified result (pairs are in the right
+    # order), and a 0 an incorrectly classified result (pairs are in the
+    # wrong order).
+    return self.predict(quadruplets).mean() / 2 + 0.5
diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py
@@ -4,6 +4,7 @@
 
 import pytest
 from numpy.testing import assert_array_equal
+from scipy.spatial.distance import euclidean
 
 from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin
 from sklearn.exceptions import NotFittedError
@@ -489,3 +490,31 @@ def breaking_fun(**args):  # a function that fails so that we will miss
   with pytest.raises(ValueError) as raised_error:
     estimator.fit(input_data, labels, calibration_params={'strategy': 'weird'})
   assert str(raised_error.value) == expected_msg
+
+
+@pytest.mark.parametrize('estimator, build_dataset', pairs_learners,
+                         ids=ids_pairs_learners)
+def test_accuracy_toy_example(estimator, build_dataset):
+  """Test that the accuracy works on some toy example (hence that the
+  prediction is OK)"""
+  input_data, labels, preprocessor, X = build_dataset(with_preprocessor=False)
+  estimator = clone(estimator)
+  estimator.set_params(preprocessor=preprocessor)
+  set_random_state(estimator)
+  estimator.fit(input_data, labels)
+  # we force the transformation to be identity so that we control what it does
+  estimator.transformer_ = np.eye(X.shape[1])
+  # the threshold for similar or dissimilar pairs is half of the distance
+  # between X[0] and X[1]
+  estimator.set_threshold(euclidean(X[0], X[1]) / 2)
+  # We take the two first points and we build 4 regularly spaced points on the
+  # line they define, so that it's easy to build quadruplets of different
+  # similarities.
+  X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4
+  pairs_test = np.array(
+      [[X_test[0], X_test[1]],  # similar
+       [X_test[0], X_test[3]],  # dissimilar
+       [X_test[1], X_test[2]],  # similar
+       [X_test[2], X_test[3]]])  # similar
+  y = np.array([-1, 1, 1, -1])  # [F, F, T, F]
+  assert accuracy_score(estimator.predict(pairs_test), y) == 0.25
diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py
@@ -40,3 +40,26 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset,
   with pytest.raises(NotFittedError):
     estimator.predict(input_data)
 
+
+@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners,
+                         ids=ids_quadruplets_learners)
+def test_accuracy_toy_example(estimator, build_dataset):
+  """Test that the default scoring for quadruplets (accuracy) works on some
+  toy example"""
+  input_data, labels, preprocessor, X = build_dataset(with_preprocessor=False)
+  estimator = clone(estimator)
+  estimator.set_params(preprocessor=preprocessor)
+  set_random_state(estimator)
+  estimator.fit(input_data)
+  # We take the two first points and we build 4 regularly spaced points on the
+  # line they define, so that it's easy to build quadruplets of different
+  # similarities.
+  X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4
+  quadruplets_test = np.array(
+      [[X_test[0], X_test[2], X_test[0], X_test[1]],
+       [X_test[1], X_test[3], X_test[1], X_test[0]],
+       [X_test[1], X_test[2], X_test[0], X_test[3]],
+       [X_test[3], X_test[0], X_test[2], X_test[1]]])
+  # we force the transformation to be identity so that we control what it does
+  estimator.transformer_ = np.eye(X.shape[1])
+  assert estimator.score(quadruplets_test) == 0.25