FIX: corrections according to reviews #95 (review) and #95 (review)

William de Vazelhes · William de Vazelhes · commit b741a9ee423a · 2018-06-05T12:01:13.000+02:00
- replace similarity by metric
- replace constrained dataset by pairs/quadruplets
- simplify score on quadruplets expression
- replace ``X_constrained`` in tests by pairs/quadruplets/tuples
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -59,15 +59,15 @@ def transform(self, X=None):
 class _PairsClassifierMixin:
 
   def predict(self, pairs):
-    """Predicts the learned similarity between input pairs.
+    """Predicts the learned metric between input pairs.
 
     Returns the learned metric value between samples in every pair. It should
     ideally be low for similar samples and high for dissimilar samples.
 
     Parameters
     ----------
     pairs : array-like, shape=(n_constraints, 2, n_features)
-      A constrained dataset of paired samples.
+      Input pairs.
 
     Returns
     -------
@@ -110,7 +110,7 @@ def score(self, pairs, y):
 class _QuadrupletsClassifierMixin:
 
   def predict(self, quadruplets):
-    """Predicts differences between sample similarities in input quadruplets.
+    """Predicts differences between sample distances in input quadruplets.
 
     For each quadruplet of samples, computes the difference between the learned
     metric of the first pair minus the learned metric of the second pair.
@@ -122,7 +122,7 @@ def predict(self, quadruplets):
 
     Returns
     -------
-    prediction : np.ndarray of floats, shape=(n_constraints,)
+    prediction : `numpy.ndarray` of floats, shape=(n_constraints,)
       Metric differences.
     """
     similar_diffs = quadruplets[:, 0, :] - quadruplets[:, 1, :]
@@ -136,7 +136,7 @@ def decision_function(self, quadruplets):
     return self.predict(quadruplets)
 
   def score(self, quadruplets, y=None):
-    """Computes score on an input constrained dataset
+    """Computes score on input quadruplets
 
     Returns the accuracy score of the following classification task: a record
     is correctly classified if the predicted similarity between the first two
@@ -154,5 +154,4 @@ def score(self, quadruplets, y=None):
     score : float
       The quadruplets score.
     """
-    predicted_sign = self.decision_function(quadruplets) < 0
-    return np.sum(predicted_sign) / predicted_sign.shape[0]
+    return - np.mean(np.sign(self.decision_function(quadruplets)))
diff --git a/test/test_weakly_supervised.py b/test/test_weakly_supervised.py
@@ -26,27 +26,27 @@ def build_data():
 
 
 def build_pairs():
-  # test that you can do cross validation on a ConstrainedDataset with
+  # test that you can do cross validation on tuples of points with
   #  a WeaklySupervisedMetricLearner
   X, pairs = build_data()
-  X_constrained, y = wrap_pairs(X, pairs)
-  X_constrained, y = shuffle(X_constrained, y)
-  (X_constrained_train, X_constrained_test, y_train,
-   y_test) = train_test_split(X_constrained, y)
-  return (X_constrained, y, X_constrained_train, X_constrained_test,
+  pairs, y = wrap_pairs(X, pairs)
+  pairs, y = shuffle(pairs, y)
+  (pairs_train, pairs_test, y_train,
+   y_test) = train_test_split(pairs, y)
+  return (pairs, y, pairs_train, pairs_test,
           y_train, y_test)
 
 
 def build_quadruplets():
-  # test that you can do cross validation on a ConstrainedDataset with
+  # test that you can do cross validation on a tuples of points with
   #  a WeaklySupervisedMetricLearner
   X, pairs = build_data()
   c = np.column_stack(pairs)
-  X_constrained = X[c]
-  X_constrained = shuffle(X_constrained)
+  quadruplets = X[c]
+  quadruplets = shuffle(quadruplets)
   y = y_train = y_test = None
-  X_constrained_train, X_constrained_test = train_test_split(X_constrained)
-  return (X_constrained, y, X_constrained_train, X_constrained_test,
+  quadruplets_train, quadruplets_test = train_test_split(quadruplets)
+  return (quadruplets, y, quadruplets_train, quadruplets_test,
           y_train, y_test)
 
 
@@ -66,35 +66,35 @@ def build_quadruplets():
 @pytest.mark.parametrize('estimator, build_dataset', list_estimators,
                          ids=ids_estimators)
 def test_cross_validation(estimator, build_dataset):
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
   set_random_state(estimator)
 
-  assert np.isfinite(cross_val_score(estimator, X_constrained, y)).all()
+  assert np.isfinite(cross_val_score(estimator, tuples, y)).all()
 
 
-def check_score(estimator, X_constrained, y):
-  score = estimator.score(X_constrained, y)
+def check_score(estimator, tuples, y):
+  score = estimator.score(tuples, y)
   assert np.isfinite(score)
 
 
-def check_predict(estimator, X_constrained):
-  y_predicted = estimator.predict(X_constrained)
-  assert len(y_predicted), len(X_constrained)
+def check_predict(estimator, tuples):
+  y_predicted = estimator.predict(tuples)
+  assert len(y_predicted), len(tuples)
 
 
 @pytest.mark.parametrize('estimator, build_dataset', list_estimators,
                          ids=ids_estimators)
 def test_simple_estimator(estimator, build_dataset):
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
   set_random_state(estimator)
 
-  estimator.fit(X_constrained_train, y_train)
-  check_score(estimator, X_constrained_test, y_test)
-  check_predict(estimator, X_constrained_test)
+  estimator.fit(tuples_train, y_train)
+  check_score(estimator, tuples_test, y_test)
+  check_predict(estimator, tuples_test)
 
 
 @pytest.mark.parametrize('estimator', [est[0] for est in list_estimators],
@@ -122,50 +122,50 @@ def test_no_fit_attributes_set_in_init(estimator):
 def test_estimators_fit_returns_self(estimator, build_dataset):
   """Check if self is returned when calling fit"""
   # From scikit-learn
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
-  assert estimator.fit(X_constrained, y) is estimator
+  assert estimator.fit(tuples, y) is estimator
 
 
 @pytest.mark.parametrize('estimator, build_dataset', list_estimators,
                          ids=ids_estimators)
 def test_pipeline_consistency(estimator, build_dataset):
   # From scikit learn
   # check that make_pipeline(est) gives same score as est
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
   pipeline = make_pipeline(estimator)
-  estimator.fit(X_constrained, y)
-  pipeline.fit(X_constrained, y)
+  estimator.fit(tuples, y)
+  pipeline.fit(tuples, y)
 
   funcs = ["score", "fit_transform"]
 
   for func_name in funcs:
     func = getattr(estimator, func_name, None)
     if func is not None:
       func_pipeline = getattr(pipeline, func_name)
-      result = func(X_constrained, y)
-      result_pipe = func_pipeline(X_constrained, y)
+      result = func(tuples, y)
+      result_pipe = func_pipeline(tuples, y)
       assert_allclose_dense_sparse(result, result_pipe)
 
 
 @pytest.mark.parametrize('estimator, build_dataset', list_estimators,
                          ids=ids_estimators)
 def test_dict_unchanged(estimator, build_dataset):
   # From scikit-learn
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
   if hasattr(estimator, "n_components"):
     estimator.n_components = 1
-  estimator.fit(X_constrained, y)
+  estimator.fit(tuples, y)
   for method in ["predict", "transform", "decision_function",
                  "predict_proba"]:
     if hasattr(estimator, method):
       dict_before = estimator.__dict__.copy()
-      getattr(estimator, method)(X_constrained)
+      getattr(estimator, method)(tuples)
       assert estimator.__dict__ == dict_before, \
           ("Estimator changes __dict__ during %s"
            % method)
@@ -176,14 +176,14 @@ def test_dict_unchanged(estimator, build_dataset):
 def test_dont_overwrite_parameters(estimator, build_dataset):
   # From scikit-learn
   # check that fit method only changes or sets private attributes
-  (X_constrained, y, X_constrained_train, X_constrained_test,
+  (tuples, y, tuples_train, tuples_test,
    y_train, y_test) = build_dataset()
   estimator = clone(estimator)
   if hasattr(estimator, "n_components"):
     estimator.n_components = 1
   dict_before_fit = estimator.__dict__.copy()
 
-  estimator.fit(X_constrained, y)
+  estimator.fit(tuples, y)
   dict_after_fit = estimator.__dict__
 
   public_keys_after_fit = [key for key in dict_after_fit.keys()