From 676ab8641d8e7fabf851848bd2265061fcf6d194 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 4 Feb 2019 14:39:24 +0100 Subject: [PATCH 01/41] add some tests for testing that different scores work using the scoring function --- test/test_sklearn_compat.py | 66 ++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index d9dce685..5dbabf98 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -15,9 +15,11 @@ import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) +from sklearn.metrics.scorer import make_scorer from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, - mock_preprocessor) + mock_preprocessor, tuples_learners, + ids_tuples_learners) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -88,22 +90,56 @@ def test_mmc(self): @pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) -def test_cross_validation_is_finite(estimator, build_dataset, - with_preprocessor): +@pytest.mark.parametrize('estimator, build_dataset', tuples_learners, + ids=ids_tuples_learners) +def test_various_scoring_on_tuples_learners(estimator, build_dataset, + with_preprocessor): + """Tests that metric-learn estimators' scoring returns something finite, + for other scoring than default scoring. (List of scikit-learn's scores can be + found in sklearn.metrics.scorer). For each type of output (predict, + predict_proba, decision_function), we test a bunch of scores. + """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + + # scores that need a predict function: every tuples learner should have a + # predict function (whether the pair is of positive samples or negative + # samples) + for scoring in ['accuracy', 'f1', 'precision', 'recall']: + check_score_is_finite(scoring, estimator, input_data, labels) + # scores that need a predict_proba: + if hasattr(estimator, "predict_proba"): + for scoring in ['neg_log_loss', 'brier_score']: + check_score_is_finite(scoring, estimator, input_data, labels) + # scores that need a decision_function: every tuples learner should have a + # decision function (the metric between points) + for scoring in ['roc_auc', 'average_precision', 'average_recall']: + check_score_is_finite(scoring, estimator, input_data, labels) + + +def check_score_is_finite(scoring, estimator, input_data, labels): + assert np.isfinite(cross_val_score(estimator, input_data, labels, + scoring=scoring)).all() + assert np.isfinite(cross_val_predict(estimator, + input_data, labels, + scoring=scoring)).all() + assert np.isfinite(make_scorer(scoring)(input_data, labels)) + + +@pytest.mark.parametrize('estimator, build_dataset', tuples_learners, + ids=ids_tuples_learners) +def test_cross_validation_is_finite(estimator, build_dataset): """Tests that validation on metric-learn estimators returns something finite """ - if any(hasattr(estimator, method) for method in ["predict", "score"]): - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - if hasattr(estimator, "score"): - assert np.isfinite(cross_val_score(estimator, input_data, labels)).all() - if hasattr(estimator, "predict"): - assert np.isfinite(cross_val_predict(estimator, - input_data, labels)).all() + input_data, labels, preprocessor, _ = build_dataset() + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + assert np.isfinite(cross_val_score(estimator, input_data, labels)).all() + assert np.isfinite(cross_val_predict(estimator, + input_data, labels)).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) From cc1c3e63709819f154c147325122a50469347bc2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Feb 2019 11:12:26 +0100 Subject: [PATCH 02/41] ENH: Add tests and basic threshold implementation --- metric_learn/base_metric.py | 12 +++++-- metric_learn/itml.py | 9 ++++- metric_learn/mmc.py | 9 ++++- metric_learn/sdml.py | 9 ++++- test/test_pairs_classifiers.py | 65 ++++++++++++++++++++++++++++++++++ test/test_sklearn_compat.py | 13 ++++--- 6 files changed, 105 insertions(+), 12 deletions(-) create mode 100644 test/test_pairs_classifiers.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 58b8cc5d..1cf1ec36 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,7 +1,7 @@ from numpy.linalg import cholesky from scipy.spatial.distance import euclidean from sklearn.base import BaseEstimator -from sklearn.utils.validation import _is_arraylike +from sklearn.utils.validation import _is_arraylike, check_is_fitted from sklearn.metrics import roc_auc_score import numpy as np from abc import ABCMeta, abstractmethod @@ -317,7 +317,8 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - return self.decision_function(pairs) + check_is_fitted(self, 'threshold_') + return - 2 * (self.decision_function(pairs) > self.threshold_) + 1 def decision_function(self, pairs): """Returns the learned metric between input pairs. @@ -369,6 +370,13 @@ def score(self, pairs, y): """ return roc_auc_score(y, self.decision_function(pairs)) + def set_default_threshold(self, pairs, y): + """Returns a threshold that is the mean between the similar metrics + mean, and the dissimilar metrics mean""" + similar_threshold = np.mean(self.decision_function(pairs[y==1])) + dissimilar_threshold = np.mean(self.decision_function(pairs[y==1])) + self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) + class _QuadrupletsClassifierMixin(BaseMetricLearner): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index a0ff05f9..2a9e987a 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -148,6 +148,11 @@ class ITML(_BaseITML, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ def fit(self, pairs, y, bounds=None): @@ -176,7 +181,9 @@ def fit(self, pairs, y, bounds=None): self : object Returns the instance. """ - return self._fit(pairs, y, bounds=bounds) + self._fit(pairs, y, bounds=bounds) + self.threshold_ = np.mean(self.bounds_) + return self class ITML_Supervised(_BaseITML, TransformerMixin): diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index f9d3690b..2ddcced2 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -359,6 +359,11 @@ class MMC(_BaseMMC, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ def fit(self, pairs, y): @@ -379,7 +384,9 @@ def fit(self, pairs, y): self : object Returns the instance. """ - return self._fit(pairs, y) + self._fit(pairs, y) + self.set_default_threshold(pairs, y) + return self class MMC_Supervised(_BaseMMC, TransformerMixin): diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 78fc4ebc..096dc0ed 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -81,6 +81,11 @@ class SDML(_BaseSDML, _PairsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(num_dims, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ def fit(self, pairs, y): @@ -101,7 +106,9 @@ def fit(self, pairs, y): self : object Returns the instance. """ - return self._fit(pairs, y) + self._fit(pairs, y) + self.set_default_threshold(pairs, y) + return self class SDML_Supervised(_BaseSDML, TransformerMixin): diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py new file mode 100644 index 00000000..c497f64b --- /dev/null +++ b/test/test_pairs_classifiers.py @@ -0,0 +1,65 @@ +import pytest +from sklearn.exceptions import NotFittedError +from sklearn.model_selection import train_test_split + +from test.test_utils import pairs_learners, ids_pairs_learners +from sklearn.utils.testing import set_random_state +from sklearn import clone +import numpy as np + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_predict_monotonous(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, + labels) + estimator.fit(pairs_train, y_train) + predictions = estimator.predict(pairs_test, y_test) + assert np.isin(predictions, [-1, 1]).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_predict_monotonous(estimator, build_dataset, + with_preprocessor): + """Test that there is a threshold distance separating points labeled as + similar and points labeled as dissimilar """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, + labels) + estimator.fit(pairs_train, y_train) + distances = estimator.score_pairs(pairs_test) + predictions = estimator.predict(pairs_test) + min_dissimilar = np.min(distances[predictions == -1]) + max_similar = np.max(distances[predictions == 1]) + assert max_similar <= min_dissimilar + separator = np.mean([min_dissimilar, max_similar]) + assert (predictions[distances > separator] == -1).all() + assert (predictions[distances < separator] == 1).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 5dbabf98..3e2b9113 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -15,7 +15,7 @@ import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) -from sklearn.metrics.scorer import make_scorer +from sklearn.metrics.scorer import get_scorer from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, @@ -107,7 +107,7 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset, # scores that need a predict function: every tuples learner should have a # predict function (whether the pair is of positive samples or negative # samples) - for scoring in ['accuracy', 'f1', 'precision', 'recall']: + for scoring in ['accuracy', 'f1']: check_score_is_finite(scoring, estimator, input_data, labels) # scores that need a predict_proba: if hasattr(estimator, "predict_proba"): @@ -115,17 +115,16 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset, check_score_is_finite(scoring, estimator, input_data, labels) # scores that need a decision_function: every tuples learner should have a # decision function (the metric between points) - for scoring in ['roc_auc', 'average_precision', 'average_recall']: + for scoring in ['roc_auc', 'average_precision', 'precision', 'recall']: check_score_is_finite(scoring, estimator, input_data, labels) def check_score_is_finite(scoring, estimator, input_data, labels): + estimator = clone(estimator) assert np.isfinite(cross_val_score(estimator, input_data, labels, scoring=scoring)).all() - assert np.isfinite(cross_val_predict(estimator, - input_data, labels, - scoring=scoring)).all() - assert np.isfinite(make_scorer(scoring)(input_data, labels)) + estimator.fit(input_data, labels) + assert np.isfinite(get_scorer(scoring)(estimator, input_data, labels)) @pytest.mark.parametrize('estimator, build_dataset', tuples_learners, From f95c456a8cc1de441ff0fc21131c2472edbaae27 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 6 Feb 2019 09:34:24 +0100 Subject: [PATCH 03/41] Add support for LSML and more generally quadruplets --- metric_learn/base_metric.py | 20 +++++++-- metric_learn/lsml.py | 24 +++++++--- test/test_pairs_classifiers.py | 8 ++-- test/test_quadruplets_classifiers.py | 65 ++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 15 deletions(-) create mode 100644 test/test_quadruplets_classifiers.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 1cf1ec36..6711efb8 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -2,7 +2,7 @@ from scipy.spatial.distance import euclidean from sklearn.base import BaseEstimator from sklearn.utils.validation import _is_arraylike, check_is_fitted -from sklearn.metrics import roc_auc_score +from sklearn.metrics import roc_auc_score, accuracy_score import numpy as np from abc import ABCMeta, abstractmethod import six @@ -317,7 +317,7 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - check_is_fitted(self, 'threshold_') + check_is_fitted(self, ['threshold_', 'transformer_']) return - 2 * (self.decision_function(pairs) > self.threshold_) + 1 def decision_function(self, pairs): @@ -401,6 +401,7 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ + check_is_fitted(self, 'transformer_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) @@ -443,11 +444,22 @@ def score(self, quadruplets, y=None): points, or 2D array of indices of quadruplets if the metric learner uses a preprocessor. - y : Ignored, for scikit-learn compatibility. + y : array-like, shape=(n_constraints,) or `None` + Labels of constraints. y[i] should be 1 if + d(pairs[i, 0], X[i, 1]) is wanted to be larger than + d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None, + `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want all + first two points to be closer than the last two points in each + quadruplet. Returns ------- score : float The quadruplets score. """ - return -np.mean(self.predict(quadruplets)) + quadruplets = check_input(quadruplets, y, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + if y is None: + y = np.ones(quadruplets.shape[0]) + return accuracy_score(y, self.predict(quadruplets)) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 312990ab..b1f65c48 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -46,9 +46,15 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, super(_BaseLSML, self).__init__(preprocessor) def _fit(self, quadruplets, y=None, weights=None): - quadruplets = self._prepare_inputs(quadruplets, + quadruplets = self._prepare_inputs(quadruplets, y, type_of_inputs='tuples') - + if y is None: + y = np.ones(quadruplets.shape[0]) + # we swap the quadruplets where the label is -1 since they are not in + # the right order + quadruplets_to_swap = quadruplets[y == -1] + quadruplets[y == -1] = np.column_stack([quadruplets_to_swap[:, 2:], + quadruplets_to_swap[:, :2]]) # check to make sure that no two constrained vectors are identical vab = quadruplets[:, 0, :] - quadruplets[:, 1, :] vcd = quadruplets[:, 2, :] - quadruplets[:, 3, :] @@ -144,7 +150,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): metric (See function `transformer_from_metric`.) """ - def fit(self, quadruplets, weights=None): + def fit(self, quadruplets, y=None, weights=None): """Learn the LSML model. Parameters @@ -152,10 +158,14 @@ def fit(self, quadruplets, weights=None): quadruplets : array-like, shape=(n_constraints, 4, n_features) or (n_constraints, 4) 3D array-like of quadruplets of points or 2D array of quadruplets of - indicators. In order to supervise the algorithm in the right way, we - should have the four samples ordered in a way such that: - d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < - n_constraints. + indicators. + y : array-like, shape=(n_constraints,) or `None` + Labels of constraints. y[i] should be 1 if + d(pairs[i, 0], X[i, 1]) is wanted to be larger than + d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None, + `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want to + put all first two points closer than the last two points in each + quadruplet. weights : (n_constraints,) array of floats, optional scale factor for each constraint diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index c497f64b..34d107ea 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -11,8 +11,8 @@ @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) -def test_predict_monotonous(estimator, build_dataset, - with_preprocessor): +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): """Test that all predicted values are either +1 or -1""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) @@ -21,7 +21,7 @@ def test_predict_monotonous(estimator, build_dataset, pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, labels) estimator.fit(pairs_train, y_train) - predictions = estimator.predict(pairs_test, y_test) + predictions = estimator.predict(pairs_test) assert np.isin(predictions, [-1, 1]).all() @@ -29,7 +29,7 @@ def test_predict_monotonous(estimator, build_dataset, @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) def test_predict_monotonous(estimator, build_dataset, - with_preprocessor): + with_preprocessor): """Test that there is a threshold distance separating points labeled as similar and points labeled as dissimilar """ input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py new file mode 100644 index 00000000..b272a52d --- /dev/null +++ b/test/test_quadruplets_classifiers.py @@ -0,0 +1,65 @@ +import pytest +from sklearn.exceptions import NotFittedError +from sklearn.model_selection import train_test_split + +from test.test_utils import quadruplets_learners, ids_quadruplets_learners +from sklearn.utils.testing import set_random_state +from sklearn import clone +import numpy as np + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + (quadruplets_train, + quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) + estimator.fit(quadruplets_train, y_train) + predictions = estimator.predict(quadruplets_test) + assert np.isin(predictions, [-1, 1]).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_predict_monotonous(estimator, build_dataset, + with_preprocessor): + """Test that there is a threshold distance separating points labeled as + similar and points labeled as dissimilar """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + (quadruplets_train, + quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) + estimator.fit(quadruplets_train, y_train) + distances = estimator.score_quadruplets(quadruplets_test) + predictions = estimator.predict(quadruplets_test) + min_dissimilar = np.min(distances[predictions == -1]) + max_similar = np.max(distances[predictions == 1]) + assert max_similar <= min_dissimilar + separator = np.mean([min_dissimilar, max_similar]) + assert (predictions[distances > separator] == -1).all() + assert (predictions[distances < separator] == 1).all() + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, + ids=ids_quadruplets_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + From 9ffe8f74e6cd8f926770a5aaf16d05fbc37a059a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 6 Feb 2019 16:32:52 +0100 Subject: [PATCH 04/41] Make CalibratedClassifierCV work (for preprocessor case) thanks to classes_ --- metric_learn/base_metric.py | 1 + metric_learn/itml.py | 5 +++++ metric_learn/mmc.py | 5 +++++ metric_learn/sdml.py | 5 +++++ test/test_sklearn_compat.py | 29 ++++++++++++++++++++++++++++- 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 6711efb8..079968bb 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -296,6 +296,7 @@ def get_mahalanobis_matrix(self): class _PairsClassifierMixin(BaseMetricLearner): + classes_ = [-1, 1] _tuple_size = 2 # number of points in a tuple, 2 for pairs def predict(self, pairs): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 2a9e987a..aa75463e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -153,6 +153,11 @@ class ITML(_BaseITML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + classes_ : `list` + The possible labels of the pairs `LSML` can fit on. `classes_ = [-1, 1]`, + where -1 means points in a pair are dissimilar (negative label), and 1 + means they are similar (positive label). """ def fit(self, pairs, y, bounds=None): diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 2ddcced2..138b1d71 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -364,6 +364,11 @@ class MMC(_BaseMMC, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + classes_ : `list` + The possible labels of the pairs `MMC` can fit on. `classes_ = [-1, 1]`, + where -1 means points in a pair are dissimilar (negative label), and 1 + means they are similar (positive label). """ def fit(self, pairs, y): diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 096dc0ed..536bd28a 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -86,6 +86,11 @@ class SDML(_BaseSDML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + classes_ : `list` + The possible labels of the pairs `SDML` can fit on. `classes_ = [-1, 1]`, + where -1 means points in a pair are dissimilar (negative label), and 1 + means they are similar (positive label). """ def fit(self, pairs, y): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 3e2b9113..096fbf37 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -1,5 +1,6 @@ import pytest import unittest +from sklearn.calibration import CalibratedClassifierCV from sklearn.utils.estimator_checks import check_estimator from sklearn.base import TransformerMixin from sklearn.pipeline import make_pipeline @@ -19,7 +20,8 @@ from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, - ids_tuples_learners) + ids_tuples_learners, pairs_learners, + ids_pairs_learners) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -89,6 +91,31 @@ def test_mmc(self): # ---------------------- Test scikit-learn compatibility ---------------------- +@pytest.mark.parametrize('with_preprocessor', + [True, + # TODO: uncomment the below line as soon as + # https://github.com/scikit-learn/scikit-learn/ + # issues/13077 is solved: + # False, + ]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_calibrated_classifier_CV(estimator, build_dataset, + with_preprocessor): + """Tests that metric-learn tuples estimators' work with scikit-learn's + CalibratedClassifierCV. + """ + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + calibrated_clf = CalibratedClassifierCV(estimator) + + # test fit and predict_proba + calibrated_clf.fit(input_data, labels) + calibrated_clf.predict_proba(input_data) + + @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', tuples_learners, ids=ids_tuples_learners) From 3354fb1206f8e9a85051057ad6197048582f0c47 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 7 Feb 2019 13:23:57 +0100 Subject: [PATCH 05/41] Fix some tests and PEP8 errors --- metric_learn/base_metric.py | 17 ++++++++++++----- test/test_quadruplets_classifiers.py | 26 +------------------------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 079968bb..61582977 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -374,8 +374,10 @@ def score(self, pairs, y): def set_default_threshold(self, pairs, y): """Returns a threshold that is the mean between the similar metrics mean, and the dissimilar metrics mean""" - similar_threshold = np.mean(self.decision_function(pairs[y==1])) - dissimilar_threshold = np.mean(self.decision_function(pairs[y==1])) + similar_threshold = np.mean(self.decision_function( + pairs[(y == 1).ravel()])) + dissimilar_threshold = np.mean(self.decision_function( + pairs[(y == -1).ravel()])) self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) @@ -458,9 +460,14 @@ def score(self, quadruplets, y=None): score : float The quadruplets score. """ - quadruplets = check_input(quadruplets, y, type_of_inputs='tuples', - preprocessor=self.preprocessor_, - estimator=self, tuple_size=self._tuple_size) + checked_input = check_input(quadruplets, y, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + # checked_input will be of the form `(checked_quadruplets, checked_y)` if + # `y` is not None, or just `checked_quadruplets` if `y` is None + quadruplets = checked_input if y is None else checked_input[0] if y is None: y = np.ones(quadruplets.shape[0]) + else: + y = checked_input[1] return accuracy_score(y, self.predict(quadruplets)) diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index b272a52d..56680476 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -25,35 +25,11 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, assert np.isin(predictions, [-1, 1]).all() -@pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, - ids=ids_quadruplets_learners) -def test_predict_monotonous(estimator, build_dataset, - with_preprocessor): - """Test that there is a threshold distance separating points labeled as - similar and points labeled as dissimilar """ - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - (quadruplets_train, - quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) - estimator.fit(quadruplets_train, y_train) - distances = estimator.score_quadruplets(quadruplets_test) - predictions = estimator.predict(quadruplets_test) - min_dissimilar = np.min(distances[predictions == -1]) - max_similar = np.max(distances[predictions == 1]) - assert max_similar <= min_dissimilar - separator = np.mean([min_dissimilar, max_similar]) - assert (predictions[distances > separator] == -1).all() - assert (predictions[distances < separator] == 1).all() - - @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', quadruplets_learners, ids=ids_quadruplets_learners) def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, - with_preprocessor): + with_preprocessor): """Test that a NotFittedError is raised if someone tries to predict and the metric learner has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) From 12cb5f1c2e618aa1884c6f0ae46b60ea62164820 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 19 Feb 2019 12:20:07 +0100 Subject: [PATCH 06/41] change the sign in decision function --- metric_learn/base_metric.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 61582977..1e087b53 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -342,7 +342,7 @@ def decision_function(self, pairs): pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return self.score_pairs(pairs) + return - self.score_pairs(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -429,8 +429,8 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ - return (self.score_pairs(quadruplets[:, :2]) - - self.score_pairs(quadruplets[:, 2:])) + return (self.score_pairs(quadruplets[:, 2:]) - + self.score_pairs(quadruplets[:, :2])) def score(self, quadruplets, y=None): """Computes score on input quadruplets From dd8113e39e72cfa706bba43e30f305a3e8283121 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 19 Feb 2019 14:07:02 +0100 Subject: [PATCH 07/41] Add docstring for threshold_ and classes_ in the base _PairsClassifier class --- metric_learn/base_metric.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 1e087b53..da5b4598 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -295,6 +295,19 @@ def get_mahalanobis_matrix(self): class _PairsClassifierMixin(BaseMetricLearner): + """ + Attributes + ---------- + threshold_ : `float` + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. + + classes_ : `list` + The possible labels of the pairs `MMC` can fit on. `classes_ = [-1, 1]`, + where -1 means points in a pair are dissimilar (negative label), and 1 + means they are similar (positive label). + """ classes_ = [-1, 1] _tuple_size = 2 # number of points in a tuple, 2 for pairs From 1c8cd290c71134409ab2641bf170b05a080febf1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 19 Feb 2019 17:30:18 +0100 Subject: [PATCH 08/41] remove quadruplets from the test with scikit learn custom scorings --- test/test_sklearn_compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 096fbf37..e9f4b546 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -117,8 +117,8 @@ def test_calibrated_classifier_CV(estimator, build_dataset, @pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', tuples_learners, - ids=ids_tuples_learners) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) def test_various_scoring_on_tuples_learners(estimator, build_dataset, with_preprocessor): """Tests that metric-learn estimators' scoring returns something finite, From d12729ab53372e03a30ffc0e4ef826d431f2422f Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Feb 2019 11:14:55 +0100 Subject: [PATCH 09/41] Remove argument y in quadruplets learners and lsml --- metric_learn/base_metric.py | 22 ++-------------------- metric_learn/lsml.py | 26 ++++++++------------------ test/test_quadruplets_classifiers.py | 2 +- 3 files changed, 11 insertions(+), 39 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index da5b4598..40a460d2 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -445,7 +445,7 @@ def decision_function(self, quadruplets): return (self.score_pairs(quadruplets[:, 2:]) - self.score_pairs(quadruplets[:, :2])) - def score(self, quadruplets, y=None): + def score(self, quadruplets): """Computes score on input quadruplets Returns the accuracy score of the following classification task: a record @@ -460,27 +460,9 @@ def score(self, quadruplets, y=None): points, or 2D array of indices of quadruplets if the metric learner uses a preprocessor. - y : array-like, shape=(n_constraints,) or `None` - Labels of constraints. y[i] should be 1 if - d(pairs[i, 0], X[i, 1]) is wanted to be larger than - d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None, - `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want all - first two points to be closer than the last two points in each - quadruplet. - Returns ------- score : float The quadruplets score. """ - checked_input = check_input(quadruplets, y, type_of_inputs='tuples', - preprocessor=self.preprocessor_, - estimator=self, tuple_size=self._tuple_size) - # checked_input will be of the form `(checked_quadruplets, checked_y)` if - # `y` is not None, or just `checked_quadruplets` if `y` is None - quadruplets = checked_input if y is None else checked_input[0] - if y is None: - y = np.ones(quadruplets.shape[0]) - else: - y = checked_input[1] - return accuracy_score(y, self.predict(quadruplets)) + return - np.mean(self.predict(quadruplets)) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index b1f65c48..536719ba 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -45,16 +45,10 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, self.verbose = verbose super(_BaseLSML, self).__init__(preprocessor) - def _fit(self, quadruplets, y=None, weights=None): - quadruplets = self._prepare_inputs(quadruplets, y, + def _fit(self, quadruplets, weights=None): + quadruplets = self._prepare_inputs(quadruplets, type_of_inputs='tuples') - if y is None: - y = np.ones(quadruplets.shape[0]) - # we swap the quadruplets where the label is -1 since they are not in - # the right order - quadruplets_to_swap = quadruplets[y == -1] - quadruplets[y == -1] = np.column_stack([quadruplets_to_swap[:, 2:], - quadruplets_to_swap[:, :2]]) + # check to make sure that no two constrained vectors are identical vab = quadruplets[:, 0, :] - quadruplets[:, 1, :] vcd = quadruplets[:, 2, :] - quadruplets[:, 3, :] @@ -150,7 +144,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): metric (See function `transformer_from_metric`.) """ - def fit(self, quadruplets, y=None, weights=None): + def fit(self, quadruplets, weights=None): """Learn the LSML model. Parameters @@ -158,14 +152,10 @@ def fit(self, quadruplets, y=None, weights=None): quadruplets : array-like, shape=(n_constraints, 4, n_features) or (n_constraints, 4) 3D array-like of quadruplets of points or 2D array of quadruplets of - indicators. - y : array-like, shape=(n_constraints,) or `None` - Labels of constraints. y[i] should be 1 if - d(pairs[i, 0], X[i, 1]) is wanted to be larger than - d(X[i, 2], X[i, 3]), and -1 if it is wanted to be smaller. If None, - `y` will be set to `np.ones(quadruplets.shape[0])`, i.e. we want to - put all first two points closer than the last two points in each - quadruplet. + indicators. In order to supervise the algorithm in the right way, we + should have the four samples ordered in a way such that: + d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < + n_constraints. weights : (n_constraints,) array of floats, optional scale factor for each constraint diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index 56680476..ee6ed7eb 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -20,7 +20,7 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, set_random_state(estimator) (quadruplets_train, quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) - estimator.fit(quadruplets_train, y_train) + estimator.fit(quadruplets_train) predictions = estimator.predict(quadruplets_test) assert np.isin(predictions, [-1, 1]).all() From dc9e21d32d0c1ed3560052337b8a41e013e3a6f2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Feb 2019 11:32:45 +0100 Subject: [PATCH 10/41] FIX fix docstrings of decision functions --- metric_learn/base_metric.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 40a460d2..9e9d0e7e 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -335,10 +335,12 @@ def predict(self, pairs): return - 2 * (self.decision_function(pairs) > self.threshold_) + 1 def decision_function(self, pairs): - """Returns the learned metric between input pairs. + """Returns the decision function used to classify the pairs. - Returns the learned metric value between samples in every pair. It should - ideally be low for similar samples and high for dissimilar samples. + Returns the opposite of the learned metric value between samples in every + pair. Hence it should ideally be low for dissimilar samples and high for + similar samples. This is the decision function that is used to classify + pairs as similar (+1), or dissimilar (-1). Parameters ---------- @@ -350,7 +352,7 @@ def decision_function(self, pairs): Returns ------- y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) - The predicted learned metric value between samples in every pair. + The predicted decision function value for each pair. """ pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, @@ -426,8 +428,12 @@ def predict(self, quadruplets): def decision_function(self, quadruplets): """Predicts differences between sample distances in input quadruplets. - For each quadruplet of samples, computes the difference between the learned - metric of the first pair minus the learned metric of the second pair. + For each quadruplet in the samples, computes the difference between the + learned metric of the second pair minus the learned metric of the first + pair. The higher it is, the more probable it is that the pairs in the + quadruplet are presented in the right order, i.e. that the label of the + quadruplet is 1. The lower it is, the more probable it is that the label of + the quadruplet is -1. Parameters ---------- From 402729fbc3cd24eadb002d3880ad7ff3af71bb1b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 20 Feb 2019 15:18:11 +0100 Subject: [PATCH 11/41] FIX the threshold by taking the opposite (to be adapted to the decision function) --- metric_learn/base_metric.py | 12 ++++++------ metric_learn/itml.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9e9d0e7e..b7927f38 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -332,7 +332,7 @@ def predict(self, pairs): The predicted learned metric value between samples in every pair. """ check_is_fitted(self, ['threshold_', 'transformer_']) - return - 2 * (self.decision_function(pairs) > self.threshold_) + 1 + return 2 * (self.decision_function(pairs) > self.threshold_) - 1 def decision_function(self, pairs): """Returns the decision function used to classify the pairs. @@ -387,13 +387,13 @@ def score(self, pairs, y): return roc_auc_score(y, self.decision_function(pairs)) def set_default_threshold(self, pairs, y): - """Returns a threshold that is the mean between the similar metrics - mean, and the dissimilar metrics mean""" - similar_threshold = np.mean(self.decision_function( + """Returns a threshold that is the opposite of the mean between the similar + metrics mean and the dissimilar metrics mean""" + similar_threshold = np.mean(self.score_pairs( pairs[(y == 1).ravel()])) - dissimilar_threshold = np.mean(self.decision_function( + dissimilar_threshold = np.mean(self.score_pairs( pairs[(y == -1).ravel()])) - self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) + self.threshold_ = - np.mean([similar_threshold, dissimilar_threshold]) class _QuadrupletsClassifierMixin(BaseMetricLearner): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index aa75463e..a40476c7 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -187,7 +187,7 @@ def fit(self, pairs, y, bounds=None): Returns the instance. """ self._fit(pairs, y, bounds=bounds) - self.threshold_ = np.mean(self.bounds_) + self.threshold_ = - np.mean(self.bounds_) return self From aaac3deb37e6dd0d5b3d4e67443ac6e318dcc874 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Feb 2019 09:38:49 +0100 Subject: [PATCH 12/41] Fix tests to have no y for quadruplets' estimator fit --- test/test_mahalanobis_mixin.py | 38 +++++----- test/test_pairs_classifiers.py | 3 +- test/test_sklearn_compat.py | 131 +++++++++++++++++++-------------- test/test_utils.py | 62 ++++++++++------ 4 files changed, 140 insertions(+), 94 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 1e555e73..a85d9e8f 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -11,7 +11,8 @@ from metric_learn._util import make_context -from test.test_utils import ids_metric_learners, metric_learners +from test.test_utils import (ids_metric_learners, metric_learners, + make_args_inc_quadruplets) RNG = check_random_state(0) @@ -25,7 +26,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) pairwise = model.score_pairs(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) @@ -49,7 +50,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.transformer_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - @@ -65,7 +66,7 @@ def test_score_pairs_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) assert np.isfinite(model.score_pairs(pairs)).all() @@ -79,7 +80,7 @@ def test_score_pairs_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) assert model.score_pairs(tuples).shape == (tuples.shape[0],) context = make_context(estimator) @@ -110,7 +111,7 @@ def test_embed_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) embedded_points = X.dot(model.transformer_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -122,7 +123,7 @@ def test_embed_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D @@ -135,8 +136,11 @@ def test_embed_dim(estimator, build_dataset): assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}: + # TODO: + # avoid this enumeration and rather test if hasattr n_components + # as soon as we have made the arguments names as such (issue #167) model.set_params(num_dims=2) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: @@ -151,7 +155,7 @@ def test_embed_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all() @@ -162,7 +166,7 @@ def test_embed_is_linear(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) assert_array_almost_equal(model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) @@ -181,7 +185,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) @@ -200,7 +204,7 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -226,7 +230,7 @@ def test_metric_raises_deprecation_warning(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) with pytest.warns(DeprecationWarning) as raised_warning: model.metric() @@ -243,7 +247,7 @@ def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X) @@ -256,7 +260,7 @@ def test_get_squared_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -276,10 +280,10 @@ def test_transformer_is_2D(estimator, build_dataset): model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features - model.fit(input_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) assert model.transformer_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature trunc_data = input_data[..., :1] - model.fit(trunc_data, labels) + model.fit(*make_args_inc_quadruplets(estimator, trunc_data, labels)) assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 34d107ea..b67e7268 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -22,7 +22,8 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, labels) estimator.fit(pairs_train, y_train) predictions = estimator.predict(pairs_test) - assert np.isin(predictions, [-1, 1]).all() + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 @pytest.mark.parametrize('with_preprocessor', [True, False]) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index e9f4b546..e14e2cf9 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -21,7 +21,8 @@ from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, ids_tuples_learners, pairs_learners, - ids_pairs_learners) + ids_pairs_learners, make_args_inc_quadruplets, + quadruplets_learners) # Wrap the _Supervised methods with a deterministic wrapper for testing. @@ -121,10 +122,11 @@ def test_calibrated_classifier_CV(estimator, build_dataset, ids=ids_pairs_learners) def test_various_scoring_on_tuples_learners(estimator, build_dataset, with_preprocessor): - """Tests that metric-learn estimators' scoring returns something finite, + """Tests that scikit-learn's scoring returns something finite, for other scoring than default scoring. (List of scikit-learn's scores can be found in sklearn.metrics.scorer). For each type of output (predict, predict_proba, decision_function), we test a bunch of scores. + We only test on pairs learners because quadruplets don't have a y argument. """ input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) @@ -147,11 +149,11 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset, def check_score_is_finite(scoring, estimator, input_data, labels): - estimator = clone(estimator) - assert np.isfinite(cross_val_score(estimator, input_data, labels, - scoring=scoring)).all() - estimator.fit(input_data, labels) - assert np.isfinite(get_scorer(scoring)(estimator, input_data, labels)) + estimator = clone(estimator) + assert np.isfinite(cross_val_score(estimator, input_data, labels, + scoring=scoring)).all() + estimator.fit(input_data, labels) + assert np.isfinite(get_scorer(scoring)(estimator, input_data, labels)) @pytest.mark.parametrize('estimator, build_dataset', tuples_learners, @@ -163,9 +165,15 @@ def test_cross_validation_is_finite(estimator, build_dataset): estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - assert np.isfinite(cross_val_score(estimator, input_data, labels)).all() + assert np.isfinite(cross_val_score(estimator, + *make_args_inc_quadruplets(estimator, + input_data, + labels))).all() assert np.isfinite(cross_val_predict(estimator, - input_data, labels)).all() + *make_args_inc_quadruplets(estimator, + input_data, + labels) + )).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -196,23 +204,25 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(input_data[train_mask], y_train) + estimator.fit(*make_args_inc_quadruplets(estimator, + input_data[train_mask], + y_train)) if hasattr(estimator, "score"): - scores.append(estimator.score(input_data[test_slice], y_test)) + scores.append(estimator.score(*make_args_inc_quadruplets(estimator, + input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): - assert all(scores == cross_val_score(estimator, input_data, labels, - cv=kfold)) + assert all(scores == cross_val_score(estimator, + *make_args_inc_quadruplets(estimator, input_data, labels), cv=kfold)) if hasattr(estimator, "predict"): - assert all(predictions == cross_val_predict(estimator, input_data, - labels, - cv=kfold)) + assert all(predictions == cross_val_predict(estimator, + *make_args_inc_quadruplets(estimator, input_data, labels), cv=kfold)) def check_score(estimator, tuples, y): if hasattr(estimator, "score"): - score = estimator.score(tuples, y) + score = estimator.score(*make_args_inc_quadruplets(estimator, tuples, y)) assert np.isfinite(score) @@ -236,7 +246,7 @@ def test_simple_estimator(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(tuples_train, y_train) + estimator.fit(*make_args_inc_quadruplets(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test) @@ -283,7 +293,9 @@ def test_estimators_fit_returns_self(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - assert estimator.fit(input_data, labels) is estimator + assert estimator.fit(*make_args_inc_quadruplets(estimator, + input_data, + labels)) is estimator @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -293,42 +305,53 @@ def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est - input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - - def make_random_state(estimator, in_pipeline): - rs = {} - name_estimator = estimator.__class__.__name__ - if name_estimator[-11:] == '_Supervised': - name_param = 'random_state' - if in_pipeline: - name_param = name_estimator.lower() + '__' + name_param - rs[name_param] = check_random_state(0) - return rs + # we do this test on all except quadruplets (since they don't have a y + # in fit): + if estimator.__class__.__name__ not in [e.__class__.__name__ + for (e, _) in + quadruplets_learners]: + input_data, y, preprocessor, _ = build_dataset(with_preprocessor) + + def make_random_state(estimator, in_pipeline): + rs = {} + name_estimator = estimator.__class__.__name__ + if name_estimator[-11:] == '_Supervised': + name_param = 'random_state' + if in_pipeline: + name_param = name_estimator.lower() + '__' + name_param + rs[name_param] = check_random_state(0) + return rs - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - pipeline = make_pipeline(estimator) - estimator.fit(input_data, y, **make_random_state(estimator, False)) - pipeline.fit(input_data, y, **make_random_state(estimator, True)) - - if hasattr(estimator, 'score'): - result = estimator.score(input_data, y) - result_pipe = pipeline.score(input_data, y) - assert_allclose_dense_sparse(result, result_pipe) - - if hasattr(estimator, 'predict'): - result = estimator.predict(input_data) - result_pipe = pipeline.predict(input_data) - assert_allclose_dense_sparse(result, result_pipe) - - if issubclass(estimator.__class__, TransformerMixin): - if hasattr(estimator, 'transform'): - result = estimator.transform(input_data) - result_pipe = pipeline.transform(input_data) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + pipeline = make_pipeline(estimator) + estimator.fit(*make_args_inc_quadruplets(estimator, input_data, y), + **make_random_state(estimator, False)) + pipeline.fit(*make_args_inc_quadruplets(estimator, input_data, y), + **make_random_state(estimator, True)) + + if hasattr(estimator, 'score'): + result = estimator.score(*make_args_inc_quadruplets(estimator, + input_data, + y)) + result_pipe = pipeline.score(*make_args_inc_quadruplets(estimator, + input_data, + y)) assert_allclose_dense_sparse(result, result_pipe) + if hasattr(estimator, 'predict'): + result = estimator.predict(input_data) + result_pipe = pipeline.predict(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + if issubclass(estimator.__class__, TransformerMixin): + if hasattr(estimator, 'transform'): + result = estimator.transform(input_data) + result_pipe = pipeline.transform(input_data) + assert_allclose_dense_sparse(result, result_pipe) -@pytest.mark.parametrize('with_preprocessor',[True, False]) + +@pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_dict_unchanged(estimator, build_dataset, with_preprocessor): @@ -339,7 +362,7 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 - estimator.fit(input_data, labels) + estimator.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( @@ -356,7 +379,7 @@ def check_dict(): check_dict() -@pytest.mark.parametrize('with_preprocessor',[True, False]) +@pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_dont_overwrite_parameters(estimator, build_dataset, @@ -370,7 +393,7 @@ def test_dont_overwrite_parameters(estimator, build_dataset, estimator.num_dims = 1 dict_before_fit = estimator.__dict__.copy() - estimator.fit(input_data, labels) + estimator.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() diff --git a/test/test_utils.py b/test/test_utils.py index 5e640dbc..38226fef 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -107,23 +107,23 @@ def build_quadruplets(with_preprocessor=False): (SDML(), build_pairs), ] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - pairs_learners])) - -classifiers = [(Covariance(), build_classification), - (LFDA(), build_classification), - (LMNN(), build_classification), - (NCA(), build_classification), - (RCA(), build_classification), - (ITML_Supervised(max_iter=5), build_classification), - (LSML_Supervised(), build_classification), - (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=10), build_classification), - (SDML_Supervised(), build_classification) - ] + [learner for (learner, _) in + pairs_learners])) + +classifiers = [(Covariance(), build_classification), + (LFDA(), build_classification), + (LMNN(), build_classification), + (NCA(), build_classification), + (RCA(), build_classification), + (ITML_Supervised(max_iter=5), build_classification), + (LSML_Supervised(), build_classification), + (MMC_Supervised(max_iter=5), build_classification), + (RCA_Supervised(num_chunks=10), build_classification), + (SDML_Supervised(), build_classification) + ] ids_classifiers = list(map(lambda x: x.__class__.__name__, - [learner for (learner, _) in - classifiers])) + [learner for (learner, _) in + classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list(map(lambda x: x.__class__.__name__, @@ -142,6 +142,18 @@ def build_quadruplets(with_preprocessor=False): ids_metric_learners = ids_tuples_learners + ids_supervised_learners +def make_args_inc_quadruplets(estimator, X, y): + """Quadruplets learners have no y in fit, but to write test for all + estimators, it is convenient to have this function, that will return X and y + if the estimator needs a y to fit on, and just X otherwise.""" + if estimator.__class__.__name__ in [e.__class__.__name__ + for (e, _) in + quadruplets_learners]: + return (X,) + else: + return (X, y) + + def mock_preprocessor(indices): """A preprocessor for testing purposes that returns an all ones 3D array """ @@ -839,8 +851,8 @@ class MockMetricLearner(MahalanobisMixin): "or a callable.".format(type(preprocessor))) -@pytest.mark.parametrize('estimator', [ITML(), LSML(), MMC(), SDML()], - ids=['ITML', 'LSML', 'MMC', 'SDML']) +@pytest.mark.parametrize('estimator', [e for (e, _) in tuples_learners], + ids=ids_tuples_learners) def test_error_message_tuple_size(estimator): """Tests that if a tuples learner is not given the good number of points per tuple, it throws an error message""" @@ -850,7 +862,7 @@ def test_error_message_tuple_size(estimator): [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) y = [1, 1] with pytest.raises(ValueError) as raised_err: - estimator.fit(invalid_pairs, y) + estimator.fit(*make_args_inc_quadruplets(estimator, invalid_pairs, y)) expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(estimator._tuple_size, make_context(estimator), @@ -935,19 +947,25 @@ def make_random_state(estimator): estimator_with_preprocessor = clone(estimator) set_random_state(estimator_with_preprocessor) estimator_with_preprocessor.set_params(preprocessor=X) - estimator_with_preprocessor.fit(indices_train, y_train, + estimator_with_preprocessor.fit(*make_args_inc_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) estimator_without_preprocessor = clone(estimator) set_random_state(estimator_without_preprocessor) estimator_without_preprocessor.set_params(preprocessor=None) - estimator_without_preprocessor.fit(formed_train, y_train, + estimator_without_preprocessor.fit(*make_args_inc_quadruplets(estimator, + formed_train, + y_train), **make_random_state(estimator)) estimator_with_prep_formed = clone(estimator) set_random_state(estimator_with_prep_formed) estimator_with_prep_formed.set_params(preprocessor=X) - estimator_with_prep_formed.fit(indices_train, y_train, + estimator_with_prep_formed.fit(*make_args_inc_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) # test prediction methods From e5b1e47b3a35d5718a11fb2da4670dd01f3a1a10 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Feb 2019 10:06:25 +0100 Subject: [PATCH 13/41] Remove isin to be compatible with old numpy versions --- test/test_quadruplets_classifiers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index ee6ed7eb..2bf36b3f 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -22,7 +22,8 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, quadruplets_test, y_train, y_test) = train_test_split(input_data, labels) estimator.fit(quadruplets_train) predictions = estimator.predict(quadruplets_test) - assert np.isin(predictions, [-1, 1]).all() + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 @pytest.mark.parametrize('with_preprocessor', [True, False]) From a0cb3cae896a07b5e73d566a181ed01abe89ed7e Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Feb 2019 10:49:37 +0100 Subject: [PATCH 14/41] Fix threshold so that it has a positive value and add small test --- metric_learn/base_metric.py | 7 ++++--- metric_learn/mmc.py | 2 +- metric_learn/sdml.py | 2 +- test/test_pairs_classifiers.py | 23 ++++++++++++++++++++++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index b7927f38..80fc7a7f 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -138,6 +138,7 @@ def get_metric(self): use the metric learner's preprocessor, and works on concatenated arrays. """ + class MetricTransformer(six.with_metaclass(ABCMeta)): @abstractmethod @@ -332,7 +333,7 @@ def predict(self, pairs): The predicted learned metric value between samples in every pair. """ check_is_fitted(self, ['threshold_', 'transformer_']) - return 2 * (self.decision_function(pairs) > self.threshold_) - 1 + return 2 * (self.decision_function(pairs) > - self.threshold_) - 1 def decision_function(self, pairs): """Returns the decision function used to classify the pairs. @@ -386,14 +387,14 @@ def score(self, pairs, y): """ return roc_auc_score(y, self.decision_function(pairs)) - def set_default_threshold(self, pairs, y): + def _set_default_threshold(self, pairs, y): """Returns a threshold that is the opposite of the mean between the similar metrics mean and the dissimilar metrics mean""" similar_threshold = np.mean(self.score_pairs( pairs[(y == 1).ravel()])) dissimilar_threshold = np.mean(self.score_pairs( pairs[(y == -1).ravel()])) - self.threshold_ = - np.mean([similar_threshold, dissimilar_threshold]) + self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) class _QuadrupletsClassifierMixin(BaseMetricLearner): diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 138b1d71..3892a969 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -390,7 +390,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self.set_default_threshold(pairs, y) + self._set_default_threshold(pairs, y) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 536bd28a..359e4fe1 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -112,7 +112,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self.set_default_threshold(pairs, y) + self._set_default_threshold(pairs, y) return self diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index b67e7268..3ff47c18 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,4 +1,5 @@ import pytest +from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin from sklearn.exceptions import NotFittedError from sklearn.model_selection import train_test_split @@ -54,7 +55,7 @@ def test_predict_monotonous(estimator, build_dataset, @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, - with_preprocessor): + with_preprocessor): """Test that a NotFittedError is raised if someone tries to predict and the metric learner has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) @@ -64,3 +65,23 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with pytest.raises(NotFittedError): estimator.predict(input_data) + +class IdentityPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): + """A simple pairs classifier for testing purposes, that will just have + identity as transformer_. + """ + def fit(self, pairs, y): + pairs, y = self._prepare_inputs(pairs, y, + type_of_inputs='tuples') + self.transformer_ = np.atleast_2d(np.identity(pairs.shape[2])) + return self + + +def test_set_default_threshold_toy_example(): + # test that the default threshold has the right value on a toy example + identity_pairs_classifier = IdentityPairsClassifier() + pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) + y = np.array([1, 1, -1, -1]) + identity_pairs_classifier.fit(pairs, y) + identity_pairs_classifier._set_default_threshold(pairs, y) + assert identity_pairs_classifier.threshold_ == 2.5 From 8d5fc501fc40daa53fb2ac83b55b65b27455d5a6 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Feb 2019 11:01:18 +0100 Subject: [PATCH 15/41] Fix threshold for itml --- metric_learn/itml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index a40476c7..aa75463e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -187,7 +187,7 @@ def fit(self, pairs, y, bounds=None): Returns the instance. """ self._fit(pairs, y, bounds=bounds) - self.threshold_ = - np.mean(self.bounds_) + self.threshold_ = np.mean(self.bounds_) return self From 0f14b251cd18d622ebff98ac10a85a49c0124528 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 4 Mar 2019 16:23:10 +0100 Subject: [PATCH 16/41] FEAT: Add calibrate_threshold and tests --- metric_learn/base_metric.py | 151 ++++++++++++++++++++- test/test_pairs_classifiers.py | 235 ++++++++++++++++++++++++++++++++- 2 files changed, 381 insertions(+), 5 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 80fc7a7f..60c6859f 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,8 +1,7 @@ -from numpy.linalg import cholesky -from scipy.spatial.distance import euclidean from sklearn.base import BaseEstimator +from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted -from sklearn.metrics import roc_auc_score, accuracy_score +from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve import numpy as np from abc import ABCMeta, abstractmethod import six @@ -333,7 +332,7 @@ def predict(self, pairs): The predicted learned metric value between samples in every pair. """ check_is_fitted(self, ['threshold_', 'transformer_']) - return 2 * (self.decision_function(pairs) > - self.threshold_) - 1 + return 2 * (self.decision_function(pairs) >= - self.threshold_) - 1 def decision_function(self, pairs): """Returns the decision function used to classify the pairs. @@ -396,6 +395,150 @@ def _set_default_threshold(self, pairs, y): pairs[(y == -1).ravel()])) self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) + def set_threshold(self, threshold): + """Sets the threshold of the metric learner to the given value `threshold + + Parameters + ---------- + threshold : float + The threshold value we want to set. It's a distance metric with + respect to which the predicted distance metric for test pairs will be + compared to. If they are superior to the threshold they will be + classified as similar (+1), and dissimilar (-1) if not. + + Returns + ------- + self : `_PairsClassifier` + The pairs classifier with the new threshold set. + """ + self.threshold_ = threshold + return self + + def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', + threshold=None, beta=None): + """Decision threshold calibration for binary classification + + Method that calibrates the decision threshold (cutoff point) of the metric + learner. This threshold will then be used when calling the method + `predict`. The methods for picking cutoff points make use of traditional + binary classification evaluation statistics such as the true positive and + true negative rates and F-scores. The threshold will be found to maximize + the chosen score on the validation set `(pairs_valid, y_valid)`. + + Parameters + ---------- + strategy : str, optional (default='roc') + The strategy to use for choosing the cutoff point + + 'accuracy' + selects a decision threshold that maximizes the accuracy + 'f_beta' + selects a decision threshold that maximizes the f_beta score + 'max_tpr' + selects the point that yields the highest true positive rate with + true negative rate at least equal to the value of the parameter + threshold + 'max_tnr' + selects the point that yields the highest true negative rate with + true positive rate at least equal to the value of the parameter + threshold + + beta : float in [0, 1], optional (default=None) + beta value to be used in case strategy == 'f_beta' + + threshold : float in [0, 1] or None, (default=None) + In case strategy is 'max_tpr' or 'max_tnr' this parameter must be set + to specify the threshold for the true negative rate or true positive + rate respectively that needs to be achieved + + pairs_valid : array-like, shape=(n_pairs_valid, 2, n_features) + The validation set of pairs to use to set the threshold. + + y_valid : array-like, shape=(n_pairs_valid,) + The labels of the pairs of the validation set to use to set the + threshold. + + References + ---------- + .. [1] Receiver-operating characteristic (ROC) plots: a fundamental + evaluation tool in clinical medicine, MH Zweig, G Campbell - + Clinical chemistry, 1993 + + .. [2] most of the code of this function is from scikit-learn's PR #10117 + + See Also + -------- + sklearn.calibration : scikit-learn's module for calibrating classifiers + """ + + if strategy not in ('accuracy', 'f_beta', 'max_tpr', + 'max_tnr'): + raise ValueError('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "{}" instead.' + .format(strategy)) + + if strategy == 'max_tpr' or strategy == 'max_tnr': + if (threshold is None or not isinstance(threshold, (int, float)) or + not threshold >= 0 or not threshold <= 1): + raise ValueError('Parameter threshold must be a number in' + '[0, 1]. ' + 'Got {} instead.'.format(threshold)) + + if strategy == 'f_beta': + if beta is None or not isinstance(beta, (int, float)): + raise ValueError('Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + + pairs_valid, y_valid = self._prepare_inputs(pairs_valid, y_valid, + type_of_inputs='tuples') + + n_samples = pairs_valid.shape[0] + if strategy == 'accuracy': + scores = self.decision_function(pairs_valid) + scores_sorted_idces = np.argsort(scores)[::-1] + scores_sorted = scores[scores_sorted_idces] + # true labels ordered by decision_function value: (higher first) + y_ordered = y_valid[scores_sorted_idces] + # finds the threshold that maximizes the accuracy: + cum_tp = stable_cumsum(y_ordered == 1) # cumulative number of true + # positives + cum_tn_inverted = stable_cumsum(y_ordered[::-1] == -1) + cum_tn = np.concatenate([[0], cum_tn_inverted[:-1]])[::-1] + cum_accuracy = (cum_tp + cum_tn) / n_samples + max_i = np.argmax(cum_accuracy) + # note: we want a positive threshold (distance), so we take - threshold + self.threshold_ = - scores_sorted[max_i] + return self + + if strategy == 'f_beta': + precision, recall, thresholds = precision_recall_curve( + y_valid, self.decision_function(pairs_valid), pos_label=1) + with np.errstate(divide='ignore', invalid='ignore'): + f_beta = ((1 + beta**2) * (precision * recall) / + (beta**2 * precision + recall)) + f_beta[np.isnan(f_beta)] = 0. + imax = np.argmax(f_beta) + self.threshold_ = - thresholds[imax] + return self + + fpr, tpr, thresholds = roc_curve(y_valid, + self.decision_function(pairs_valid), + pos_label=1) + fpr, tpr, thresholds = fpr, tpr, thresholds + + if strategy == 'max_tpr': + indices = np.where(1 - fpr >= threshold)[0] + max_tpr_index = np.argmax(tpr[indices]) + # note: we want a positive threshold (distance), so we take - threshold + self.threshold_ = - thresholds[indices[max_tpr_index]] + + if strategy == 'max_tnr': + indices = np.where(tpr >= threshold)[0] + max_tnr_index = np.argmax(1 - fpr[indices]) + # note: we want a positive threshold (distance), so we take - threshold + self.threshold_ = - thresholds[indices[max_tnr_index]] + return self + class _QuadrupletsClassifierMixin(BaseMetricLearner): diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 3ff47c18..adeeb3cb 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,12 +1,17 @@ +from functools import partial + import pytest from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin from sklearn.exceptions import NotFittedError +from sklearn.metrics import (f1_score, accuracy_score, fbeta_score, + precision_score) from sklearn.model_selection import train_test_split from test.test_utils import pairs_learners, ids_pairs_learners from sklearn.utils.testing import set_random_state from sklearn import clone import numpy as np +from itertools import product @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -66,17 +71,54 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator.predict(input_data) +@pytest.mark.parametrize('kwargs', + [{'strategy': 'accuracy'}, + *[{'strategy': strategy, 'threshold': threshold} + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])], + *[{'strategy': 'f_beta', 'beta': beta} + for beta in [0., 0.1, 0.2, 1., 5.]] + ]) +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_threshold_different_scores_is_finite(estimator, build_dataset, + with_preprocessor, kwargs): + # test that the score returned is finite for every metric learner + input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(input_data, labels) + with pytest.warns(None) as record: + estimator.calibrate_threshold(input_data, labels, **kwargs) + assert len(record) == 0 + + class IdentityPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): """A simple pairs classifier for testing purposes, that will just have - identity as transformer_. + identity as transformer_, and a string threshold so that it returns an + error if not explicitely set. """ def fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') self.transformer_ = np.atleast_2d(np.identity(pairs.shape[2])) + self.threshold_ = 'I am not set.' return self +def test_set_threshold(): + # test that set_threshold indeed sets the threshold + identity_pairs_classifier = IdentityPairsClassifier() + pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) + y = np.array([1, 1, -1, -1]) + identity_pairs_classifier.fit(pairs, y) + identity_pairs_classifier.set_threshold(0.5) + assert identity_pairs_classifier.threshold_ == 0.5 + + def test_set_default_threshold_toy_example(): # test that the default threshold has the right value on a toy example identity_pairs_classifier = IdentityPairsClassifier() @@ -85,3 +127,194 @@ def test_set_default_threshold_toy_example(): identity_pairs_classifier.fit(pairs, y) identity_pairs_classifier._set_default_threshold(pairs, y) assert identity_pairs_classifier.threshold_ == 2.5 + + +def test_f_beta_1_is_f_1(): + # test that putting beta to 1 indeed finds the best threshold to optimize + # the f1_score + rng = np.random.RandomState(42) + n_samples = 100 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, strategy='f_beta', beta=1) + best_f1_score = f1_score(y, pairs_learner.predict(pairs)) + for threshold in - pairs_learner.decision_function(pairs): + pairs_learner.set_threshold(threshold) + assert f1_score(y, pairs_learner.predict(pairs)) <= best_f1_score + + +def true_pos_true_neg_rates(y_true, y_pred): + """A function that returns the true positive rates and the true negatives + rate. For testing purposes (optimized for readability not performance).""" + assert y_pred.shape[0] == y_true.shape[0] + tp = np.sum((y_pred == 1) * (y_true == 1)) + tn = np.sum((y_pred == -1) * (y_true == -1)) + fn = np.sum((y_pred == -1) * (y_true == 1)) + fp = np.sum((y_pred == 1) * (y_true == -1)) + tpr = tp / (tp + fn) + tnr = tn / (tn + fp) + tpr = tpr if not np.isnan(tpr) else 0. + tnr = tnr if not np.isnan(tnr) else 0. + return tpr, tnr + + +def tpr_threshold(y_true, y_pred, tnr_threshold=0.): + """A function that returns the true positive rate if the true negative + rate is higher or equal than `threshold`, and -1 otherwise. For testing + purposes""" + tpr, tnr = true_pos_true_neg_rates(y_true, y_pred) + if tnr < tnr_threshold: + return -1 + else: + return tpr + + +def tnr_threshold(y_true, y_pred, tpr_threshold=0.): + """A function that returns the true negative rate if the true positive + rate is higher or equal than `threshold`, and -1 otherwise. For testing + purposes""" + tpr, tnr = true_pos_true_neg_rates(y_true, y_pred) + if tpr < tpr_threshold: + return -1 + else: + return tnr + + +@pytest.mark.parametrize('kwargs, scoring', + [({'strategy': 'accuracy'}, accuracy_score), + *[({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]], + ({'strategy': 'f_beta', 'beta': 0}, precision_score), + *[({'strategy': 'max_tpr', 'threshold': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + *[({'strategy': 'max_tnr', 'threshold': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + ]) +def test_found_score_is_best_score(kwargs, scoring): + # test that when we use calibrate threshold, it will indeed be the + # threshold that have the best score + rng = np.random.RandomState(42) + n_samples = 50 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, **kwargs) + best_score = scoring(y, pairs_learner.predict(pairs)) + scores = [] + i = 0 + predicted_scores = pairs_learner.decision_function(pairs) + predicted_scores = np.hstack([[np.min(predicted_scores) - 1], + predicted_scores, + [np.max(predicted_scores) + 1]]) + for threshold in - predicted_scores: + pairs_learner.set_threshold(threshold) + score = scoring(y, pairs_learner.predict(pairs)) + i += 1 + assert score <= best_score + scores.append(score) + assert len(set(scores)) > 1 # assert that we didn't always have the same + # value for the score (which could be a hint for some bug, but would still + # silently pass the test)) + + +@pytest.mark.parametrize('kwargs, scoring', + [({'strategy': 'accuracy'}, accuracy_score), + *[({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]], + ({'strategy': 'f_beta', 'beta': 0}, precision_score), + *[({'strategy': 'max_tpr', 'threshold': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + *[({'strategy': 'max_tnr', 'threshold': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + ]) +def test_found_score_is_best_score_duplicates(kwargs, scoring): + # test that when we use calibrate threshold, it will indeed be the + # threshold that have the best score. It's the same as the previous test + # except this time we test that the scores are coherent even if there are + # duplicates (i.e. points that have the same score returned by + # `decision_function`). + rng = np.random.RandomState(42) + n_samples = 50 + pairs, y = rng.randn(n_samples, 2, 5), rng.choice([-1, 1], size=n_samples) + # we create some duplicates points, which will also have the same score + # predicted + pairs[6:10] = pairs[10:14] + y[6:10] = y[10:14] + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + pairs_learner.calibrate_threshold(pairs, y, **kwargs) + best_score = scoring(y, pairs_learner.predict(pairs)) + scores = [] + i = 0 + predicted_scores = pairs_learner.decision_function(pairs) + predicted_scores = np.hstack([[np.min(predicted_scores) - 1], + predicted_scores, + [np.max(predicted_scores) + 1]]) + for threshold in - predicted_scores: + pairs_learner.set_threshold(threshold) + score = scoring(y, pairs_learner.predict(pairs)) + i += 1 + assert score <= best_score + scores.append(score) + assert len(set(scores)) > 1 # assert that we didn't always have the same + # value for the score (which could be a hint for some bug, but would still + # silently pass the test)) + + +@pytest.mark.parametrize('invalid_args, expected_msg', + [({'strategy': 'weird'}, + ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.')), + *[({'strategy': strategy, 'threshold': threshold}, + 'Parameter threshold must be a number in' + '[0, 1]. Got {} instead.'.format(threshold)) + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], + [None, 'weird', -0.2, 1.2, 3 + 2j])], + *[({'strategy': 'f_beta', 'beta': beta}, + 'Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + for beta in [None, 'weird', 3 + 2j]] + ]) +def test_calibrate_threshold_invalid_parameters_right_error(invalid_args, + expected_msg): + # test that the right error message is returned if invalid arguments are + # given to calibrate_threshold + rng = np.random.RandomState(42) + pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + with pytest.raises(ValueError) as raised_error: + pairs_learner.calibrate_threshold(pairs, y, **invalid_args) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('valid_args', + [{'strategy': 'accuracy'}, + *[{'strategy': strategy, 'threshold': threshold} + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])], + *[{'strategy': 'f_beta', 'beta': beta} + for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] + # Note that we authorize beta < 0 (even if + # in fact it will be squared, so it would be useless + # to do that) + ]) +def test_calibrate_threshold_valid_parameters(valid_args): + # test that no warning message is returned if valid arguments are given to + # calibrate threshold + rng = np.random.RandomState(42) + pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) + pairs_learner = IdentityPairsClassifier() + pairs_learner.fit(pairs, y) + with pytest.warns(None) as record: + pairs_learner.calibrate_threshold(pairs, y, **valid_args) + assert len(record) == 0 From a6458a228089dca7f8c7c6c2a435e63ae2984edb Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 09:02:14 +0100 Subject: [PATCH 17/41] MAINT: remove starred syntax for compatibility with older versions of python --- test/test_pairs_classifiers.py | 113 +++++++++++++++++---------------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index adeeb3cb..a03759fd 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -72,14 +72,13 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, @pytest.mark.parametrize('kwargs', - [{'strategy': 'accuracy'}, - *[{'strategy': strategy, 'threshold': threshold} - for (strategy, threshold) in product( - ['max_tpr', 'max_tnr'], - [0., 0.2, 0.8, 1.])], - *[{'strategy': 'f_beta', 'beta': beta} - for beta in [0., 0.1, 0.2, 1., 5.]] - ]) + [{'strategy': 'accuracy'}] + + [{'strategy': strategy, 'threshold': threshold} + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [0., 0.1, 0.2, 1., 5.]] + ) @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) @@ -182,18 +181,19 @@ def tnr_threshold(y_true, y_pred, tpr_threshold=0.): @pytest.mark.parametrize('kwargs, scoring', - [({'strategy': 'accuracy'}, accuracy_score), - *[({'strategy': 'f_beta', 'beta': b}, - partial(fbeta_score, beta=b)) - for b in [0.1, 0.5, 1.]], - ({'strategy': 'f_beta', 'beta': 0}, precision_score), - *[({'strategy': 'max_tpr', 'threshold': t}, - partial(tpr_threshold, tnr_threshold=t)) - for t in [0., 0.1, 0.5, 0.8, 1.]], - *[({'strategy': 'max_tnr', 'threshold': t}, - partial(tnr_threshold, tpr_threshold=t)) - for t in [0., 0.1, 0.5, 0.8, 1.]], - ]) + [({'strategy': 'accuracy'}, accuracy_score)] + + [({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]] + + [({'strategy': 'f_beta', 'beta': 0}, + precision_score)] + + [({'strategy': 'max_tpr', 'threshold': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + + [({'strategy': 'max_tnr', 'threshold': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]], + ) def test_found_score_is_best_score(kwargs, scoring): # test that when we use calibrate threshold, it will indeed be the # threshold that have the best score @@ -222,18 +222,19 @@ def test_found_score_is_best_score(kwargs, scoring): @pytest.mark.parametrize('kwargs, scoring', - [({'strategy': 'accuracy'}, accuracy_score), - *[({'strategy': 'f_beta', 'beta': b}, - partial(fbeta_score, beta=b)) - for b in [0.1, 0.5, 1.]], - ({'strategy': 'f_beta', 'beta': 0}, precision_score), - *[({'strategy': 'max_tpr', 'threshold': t}, - partial(tpr_threshold, tnr_threshold=t)) - for t in [0., 0.1, 0.5, 0.8, 1.]], - *[({'strategy': 'max_tnr', 'threshold': t}, - partial(tnr_threshold, tpr_threshold=t)) - for t in [0., 0.1, 0.5, 0.8, 1.]], - ]) + [({'strategy': 'accuracy'}, accuracy_score)] + + [({'strategy': 'f_beta', 'beta': b}, + partial(fbeta_score, beta=b)) + for b in [0.1, 0.5, 1.]] + + [({'strategy': 'f_beta', 'beta': 0}, + precision_score)] + + [({'strategy': 'max_tpr', 'threshold': t}, + partial(tpr_threshold, tnr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + + [({'strategy': 'max_tnr', 'threshold': t}, + partial(tnr_threshold, tpr_threshold=t)) + for t in [0., 0.1, 0.5, 0.8, 1.]] + ) def test_found_score_is_best_score_duplicates(kwargs, scoring): # test that when we use calibrate threshold, it will indeed be the # threshold that have the best score. It's the same as the previous test @@ -270,19 +271,19 @@ def test_found_score_is_best_score_duplicates(kwargs, scoring): @pytest.mark.parametrize('invalid_args, expected_msg', [({'strategy': 'weird'}, - ('Strategy can either be "accuracy", "f_beta" or ' - '"max_tpr" or "max_tnr". Got "weird" instead.')), - *[({'strategy': strategy, 'threshold': threshold}, - 'Parameter threshold must be a number in' - '[0, 1]. Got {} instead.'.format(threshold)) - for (strategy, threshold) in product( - ['max_tpr', 'max_tnr'], - [None, 'weird', -0.2, 1.2, 3 + 2j])], - *[({'strategy': 'f_beta', 'beta': beta}, - 'Parameter beta must be a real number. ' - 'Got {} instead.'.format(type(beta))) - for beta in [None, 'weird', 3 + 2j]] - ]) + ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.'))] + + [({'strategy': strategy, 'threshold': threshold}, + 'Parameter threshold must be a number in' + '[0, 1]. Got {} instead.'.format(threshold)) + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], + [None, 'weird', -0.2, 1.2, 3 + 2j])] + + [({'strategy': 'f_beta', 'beta': beta}, + 'Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + for beta in [None, 'weird', 3 + 2j]] + ) def test_calibrate_threshold_invalid_parameters_right_error(invalid_args, expected_msg): # test that the right error message is returned if invalid arguments are @@ -297,17 +298,17 @@ def test_calibrate_threshold_invalid_parameters_right_error(invalid_args, @pytest.mark.parametrize('valid_args', - [{'strategy': 'accuracy'}, - *[{'strategy': strategy, 'threshold': threshold} - for (strategy, threshold) in product( - ['max_tpr', 'max_tnr'], - [0., 0.2, 0.8, 1.])], - *[{'strategy': 'f_beta', 'beta': beta} - for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] - # Note that we authorize beta < 0 (even if - # in fact it will be squared, so it would be useless - # to do that) - ]) + [{'strategy': 'accuracy'}] + + [{'strategy': strategy, 'threshold': threshold} + for (strategy, threshold) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] + # Note that we authorize beta < 0 (even if + # in fact it will be squared, so it would be useless + # to do that) + ) def test_calibrate_threshold_valid_parameters(valid_args): # test that no warning message is returned if valid arguments are given to # calibrate threshold From fada5cc42c0c62c76481d0344d23b86d9182dbcd Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 13:29:14 +0100 Subject: [PATCH 18/41] Remove debugging prints and make tests for ITML pass, while waiting for #175 to be solved --- test/test_pairs_classifiers.py | 4 ---- test/test_utils.py | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index a03759fd..240b81cb 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -205,7 +205,6 @@ def test_found_score_is_best_score(kwargs, scoring): pairs_learner.calibrate_threshold(pairs, y, **kwargs) best_score = scoring(y, pairs_learner.predict(pairs)) scores = [] - i = 0 predicted_scores = pairs_learner.decision_function(pairs) predicted_scores = np.hstack([[np.min(predicted_scores) - 1], predicted_scores, @@ -213,7 +212,6 @@ def test_found_score_is_best_score(kwargs, scoring): for threshold in - predicted_scores: pairs_learner.set_threshold(threshold) score = scoring(y, pairs_learner.predict(pairs)) - i += 1 assert score <= best_score scores.append(score) assert len(set(scores)) > 1 # assert that we didn't always have the same @@ -253,7 +251,6 @@ def test_found_score_is_best_score_duplicates(kwargs, scoring): pairs_learner.calibrate_threshold(pairs, y, **kwargs) best_score = scoring(y, pairs_learner.predict(pairs)) scores = [] - i = 0 predicted_scores = pairs_learner.decision_function(pairs) predicted_scores = np.hstack([[np.min(predicted_scores) - 1], predicted_scores, @@ -261,7 +258,6 @@ def test_found_score_is_best_score_duplicates(kwargs, scoring): for threshold in - predicted_scores: pairs_learner.set_threshold(threshold) score = scoring(y, pairs_learner.predict(pairs)) - i += 1 assert score <= best_score scores.append(score) assert len(set(scores)) > 1 # assert that we didn't always have the same diff --git a/test/test_utils.py b/test/test_utils.py index 38226fef..afd54288 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -102,8 +102,11 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) -pairs_learners = [(ITML(), build_pairs), - (MMC(max_iter=2), build_pairs), # max_iter=2 for faster +pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be + # faster, also make tests pass while waiting for #175 to + # be solved + # TODO: remove this comment when #175 is solved + (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(), build_pairs), ] ids_pairs_learners = list(map(lambda x: x.__class__.__name__, From 32a48897d7e4bfaaea9cfbdceb4778c7e5e3115b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 5 Mar 2019 13:52:49 +0100 Subject: [PATCH 19/41] FIX: from __future__ import division to pass tests for python 2.7 --- test/test_pairs_classifiers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 240b81cb..298e0c5f 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,3 +1,5 @@ +from __future__ import division + from functools import partial import pytest From 5cf71b909a9a3f85daa0922252896e9d38a76634 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 11 Mar 2019 15:02:33 +0100 Subject: [PATCH 20/41] Add some documentation for calibration --- doc/conf.py | 13 +++- doc/weakly_supervised.rst | 112 +++++++++++++++++++++++---------- metric_learn/base_metric.py | 15 ++++- metric_learn/itml.py | 2 + metric_learn/mmc.py | 2 +- metric_learn/sdml.py | 2 +- test/test_pairs_classifiers.py | 2 +- 7 files changed, 108 insertions(+), 40 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f0faa2f8..8f5fdcaa 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import sys extensions = [ 'sphinx.ext.autodoc', @@ -8,7 +9,8 @@ 'sphinx.ext.mathjax', 'numpydoc', 'sphinx_gallery.gen_gallery', - 'sphinx.ext.doctest' + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx' ] templates_path = ['_templates'] @@ -39,3 +41,12 @@ # Option to hide doctests comments in the documentation (like # doctest: # +NORMALIZE_WHITESPACE for instance) trim_doctest_flags = True + +# intersphinx configuration +intersphinx_mapping = { + 'python': ('https://docs.python.org/{.major}'.format( + sys.version_info), None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), + 'scikit-learn': ('https://scikit-learn.org/stable/', None) +} \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index deae9b40..8111238b 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -148,6 +148,42 @@ tuples you're working with (pairs, triplets...). See the docstring of the `score` method of the estimator you use. +Learning on pairs +================= + +Some metric learning algorithms learn on pairs of samples. In this case, one +should provide the algorithm with ``n_samples`` pairs of points, with a +corresponding target containing ``n_samples`` values being either +1 or -1. +These values indicate whether the given pairs are similar points or +dissimilar points. + + +.. _calibration: + +Thresholding +------------ +In order to predict whether a new pair represents similar or dissimilar +samples, we need to set a distance threshold, so that points closer (in the +learned space) than this threshold are predicted as similar, and points further +away are predicted as dissimilar. Several methods are possible for this +thresholding. + +- **default**: Unless explicitely stated in the `fit` method documentation + of the estimator, the threshold is set with the method + `set_default_threshold` on the trainset. + +- **manual**: calling `set_threshold`, the user can + manually set the threshold to a particular value. + +- **calibrated**: calling `calibrate_threshold`, the user can + calibrate the threshold to achieve a particular score on a validation set, + the score being among the classical scores for classification (accuracy, f1 + score...). + + +See also: `sklearn.calibration`. + + Algorithms ================== @@ -192,39 +228,6 @@ programming. .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/ itml/ - -LSML ----- - -`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared -Residual - -.. topic:: Example Code: - -:: - - from metric_learn import LSML - - quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], - [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], - [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] - - # we want to make closer points where the first feature is close, and - # further if the second feature is close - - lsml = LSML() - lsml.fit(quadruplets) - -.. topic:: References: - - .. [1] Liu et al. - "Metric Learning from Relative Comparisons by Minimizing Squared - Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 - - SDML ---- @@ -343,3 +346,46 @@ method. However, it is one of the earliest and a still often cited technique. -with-side-information.pdf>`_ Xing, Jordan, Russell, Ng. .. [2] Adapted from Matlab code `here `_. + +Learning on quadruplets +======================= + +A type of information even weaker than pairs is information about relative +comparisons between pairs. The user should provide the algorithm with a +quadruplet of points, where the two first points are closer than the two +last points. No target vector (``y``) is needed, since the supervision is +already in the order that points are given in the quadruplet. + +Algorithms +================== + +LSML +---- + +`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared +Residual + +.. topic:: Example Code: + +:: + + from metric_learn import LSML + + quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + + # we want to make closer points where the first feature is close, and + # further if the second feature is close + + lsml = LSML() + lsml.fit(quadruplets) + +.. topic:: References: + + .. [1] Liu et al. + "Metric Learning from Relative Comparisons by Minimizing Squared + Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 60c6859f..c16c95d1 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -386,9 +386,14 @@ def score(self, pairs, y): """ return roc_auc_score(y, self.decision_function(pairs)) - def _set_default_threshold(self, pairs, y): - """Returns a threshold that is the opposite of the mean between the similar - metrics mean and the dissimilar metrics mean""" + def set_default_threshold(self, pairs, y): + """Sets the default threshold on the given dataset. + + Returns a threshold that is the mean between the similar + metrics mean and the dissimilar metrics mean. + + See more in the :ref:`User Guide `. + """ similar_threshold = np.mean(self.score_pairs( pairs[(y == 1).ravel()])) dissimilar_threshold = np.mean(self.score_pairs( @@ -398,6 +403,8 @@ def _set_default_threshold(self, pairs, y): def set_threshold(self, threshold): """Sets the threshold of the metric learner to the given value `threshold + See more in the :ref:`User Guide `. + Parameters ---------- threshold : float @@ -425,6 +432,8 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', true negative rates and F-scores. The threshold will be found to maximize the chosen score on the validation set `(pairs_valid, y_valid)`. + See more in the :ref:`User Guide `. + Parameters ---------- strategy : str, optional (default='roc') diff --git a/metric_learn/itml.py b/metric_learn/itml.py index aa75463e..7eeec13e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -163,6 +163,8 @@ class ITML(_BaseITML, _PairsClassifierMixin): def fit(self, pairs, y, bounds=None): """Learn the ITML model. + The default threshold will be set to the mean of the bounds. + Parameters ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 3892a969..138b1d71 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -390,7 +390,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self._set_default_threshold(pairs, y) + self.set_default_threshold(pairs, y) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 359e4fe1..536bd28a 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -112,7 +112,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self._set_default_threshold(pairs, y) + self.set_default_threshold(pairs, y) return self diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 298e0c5f..8ee20d3a 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -126,7 +126,7 @@ def test_set_default_threshold_toy_example(): pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) y = np.array([1, 1, -1, -1]) identity_pairs_classifier.fit(pairs, y) - identity_pairs_classifier._set_default_threshold(pairs, y) + identity_pairs_classifier.set_default_threshold(pairs, y) assert identity_pairs_classifier.threshold_ == 2.5 From c2bc693b568e2c294362703940f1f4c54f4dfd11 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 11 Mar 2019 15:27:27 +0100 Subject: [PATCH 21/41] DOC: fix style --- doc/weakly_supervised.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 8111238b..87a0ac6b 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -185,7 +185,7 @@ See also: `sklearn.calibration`. Algorithms -================== +========== ITML ---- @@ -357,7 +357,7 @@ last points. No target vector (``y``) is needed, since the supervision is already in the order that points are given in the quadruplet. Algorithms -================== +========== LSML ---- From 3ed3430b1cbae48a566a12cfa736ac38943e2388 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 10:32:53 +0100 Subject: [PATCH 22/41] Address most comments from aurelien's reviews --- doc/weakly_supervised.rst | 11 +++-- metric_learn/base_metric.py | 77 ++++++++++++++++++---------------- metric_learn/itml.py | 2 +- test/test_pairs_classifiers.py | 26 ++++++------ 4 files changed, 59 insertions(+), 57 deletions(-) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 87a0ac6b..2868f9e8 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -172,13 +172,12 @@ thresholding. of the estimator, the threshold is set with the method `set_default_threshold` on the trainset. -- **manual**: calling `set_threshold`, the user can - manually set the threshold to a particular value. +- **manual**: calling `set_threshold` will set the threshold to a + particular value. -- **calibrated**: calling `calibrate_threshold`, the user can - calibrate the threshold to achieve a particular score on a validation set, - the score being among the classical scores for classification (accuracy, f1 - score...). +- **calibrated**: calling `calibrate_threshold` will calibrate the threshold to + achieve a particular score on a validation set, the score being among the + classical scores for classification (accuracy, f1 score...). See also: `sklearn.calibration`. diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index c16c95d1..be2f5589 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -304,9 +304,9 @@ class _PairsClassifierMixin(BaseMetricLearner): classified as dissimilar. classes_ : `list` - The possible labels of the pairs `MMC` can fit on. `classes_ = [-1, 1]`, - where -1 means points in a pair are dissimilar (negative label), and 1 - means they are similar (positive label). + The possible labels of the pairs the metric learner can fit on. + `classes_ = [-1, 1]`, where -1 means points in a pair are dissimilar + (negative label), and 1 means they are similar (positive label). """ classes_ = [-1, 1] @@ -338,9 +338,10 @@ def decision_function(self, pairs): """Returns the decision function used to classify the pairs. Returns the opposite of the learned metric value between samples in every - pair. Hence it should ideally be low for dissimilar samples and high for - similar samples. This is the decision function that is used to classify - pairs as similar (+1), or dissimilar (-1). + pair, to be consistent with scikit-learn conventions. Hence it should + ideally be low for dissimilar samples and high for similar samples. + This is the decision function that is used to classify pairs as similar + (+1), or dissimilar (-1). Parameters ---------- @@ -401,17 +402,17 @@ def set_default_threshold(self, pairs, y): self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) def set_threshold(self, threshold): - """Sets the threshold of the metric learner to the given value `threshold + """Sets the threshold of the metric learner to the given value `threshold`. See more in the :ref:`User Guide `. Parameters ---------- threshold : float - The threshold value we want to set. It's a distance metric with - respect to which the predicted distance metric for test pairs will be - compared to. If they are superior to the threshold they will be - classified as similar (+1), and dissimilar (-1) if not. + The threshold value we want to set. It is the value to which the + predicted distance for test pairs will be compared. If they are superior + to the threshold they will be classified as similar (+1), + and dissimilar (-1) if not. Returns ------- @@ -422,50 +423,51 @@ def set_threshold(self, threshold): return self def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', - threshold=None, beta=None): - """Decision threshold calibration for binary classification + min_rate=None, beta=1.): + """Decision threshold calibration for pairwise binary classification Method that calibrates the decision threshold (cutoff point) of the metric learner. This threshold will then be used when calling the method `predict`. The methods for picking cutoff points make use of traditional binary classification evaluation statistics such as the true positive and true negative rates and F-scores. The threshold will be found to maximize - the chosen score on the validation set `(pairs_valid, y_valid)`. + the chosen score on the validation set ``(pairs_valid, y_valid)``. See more in the :ref:`User Guide `. Parameters ---------- - strategy : str, optional (default='roc') - The strategy to use for choosing the cutoff point + strategy : str, optional (default='accuracy') + The strategy to use for choosing the cutoff threshold. 'accuracy' - selects a decision threshold that maximizes the accuracy + Selects a decision threshold that maximizes the accuracy. 'f_beta' - selects a decision threshold that maximizes the f_beta score + Selects a decision threshold that maximizes the f_beta score, + with beta given by the parameter `beta`. 'max_tpr' - selects the point that yields the highest true positive rate with - true negative rate at least equal to the value of the parameter - threshold + Selects a decision threshold that yields the highest true positive + rate with true negative rate at least equal to the value of the + parameter `min_rate`. 'max_tnr' - selects the point that yields the highest true negative rate with - true positive rate at least equal to the value of the parameter - threshold + Selects a decision threshold that yields the highest true negative + rate with true positive rate at least equal to the value of the + parameter `min_rate`. beta : float in [0, 1], optional (default=None) - beta value to be used in case strategy == 'f_beta' + Beta value to be used in case strategy == 'f_beta'. - threshold : float in [0, 1] or None, (default=None) + min_rate : float in [0, 1] or None, (default=None) In case strategy is 'max_tpr' or 'max_tnr' this parameter must be set - to specify the threshold for the true negative rate or true positive - rate respectively that needs to be achieved + to specify the minimal value for the true negative rate or true positive + rate respectively that needs to be achieved. pairs_valid : array-like, shape=(n_pairs_valid, 2, n_features) The validation set of pairs to use to set the threshold. y_valid : array-like, shape=(n_pairs_valid,) The labels of the pairs of the validation set to use to set the - threshold. + threshold. They must be +1 for positive pairs and -1 for negative pairs. References ---------- @@ -487,11 +489,11 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', .format(strategy)) if strategy == 'max_tpr' or strategy == 'max_tnr': - if (threshold is None or not isinstance(threshold, (int, float)) or - not threshold >= 0 or not threshold <= 1): - raise ValueError('Parameter threshold must be a number in' + if (min_rate is None or not isinstance(min_rate, (int, float)) or + not min_rate >= 0 or not min_rate <= 1): + raise ValueError('Parameter min_rate must be a number in' '[0, 1]. ' - 'Got {} instead.'.format(threshold)) + 'Got {} instead.'.format(min_rate)) if strategy == 'f_beta': if beta is None or not isinstance(beta, (int, float)): @@ -514,9 +516,9 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', cum_tn_inverted = stable_cumsum(y_ordered[::-1] == -1) cum_tn = np.concatenate([[0], cum_tn_inverted[:-1]])[::-1] cum_accuracy = (cum_tp + cum_tn) / n_samples - max_i = np.argmax(cum_accuracy) + imax = np.argmax(cum_accuracy) # note: we want a positive threshold (distance), so we take - threshold - self.threshold_ = - scores_sorted[max_i] + self.threshold_ = - scores_sorted[imax] return self if strategy == 'f_beta': @@ -527,6 +529,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', (beta**2 * precision + recall)) f_beta[np.isnan(f_beta)] = 0. imax = np.argmax(f_beta) + # note: we want a positive threshold (distance), so we take - threshold self.threshold_ = - thresholds[imax] return self @@ -536,13 +539,13 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', fpr, tpr, thresholds = fpr, tpr, thresholds if strategy == 'max_tpr': - indices = np.where(1 - fpr >= threshold)[0] + indices = np.where(1 - fpr >= min_rate)[0] max_tpr_index = np.argmax(tpr[indices]) # note: we want a positive threshold (distance), so we take - threshold self.threshold_ = - thresholds[indices[max_tpr_index]] if strategy == 'max_tnr': - indices = np.where(tpr >= threshold)[0] + indices = np.where(tpr >= min_rate)[0] max_tnr_index = np.argmax(1 - fpr[indices]) # note: we want a positive threshold (distance), so we take - threshold self.threshold_ = - thresholds[indices[max_tnr_index]] diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7eeec13e..e31368b0 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -155,7 +155,7 @@ class ITML(_BaseITML, _PairsClassifierMixin): classified as dissimilar. classes_ : `list` - The possible labels of the pairs `LSML` can fit on. `classes_ = [-1, 1]`, + The possible labels of the pairs `ITML` can fit on. `classes_ = [-1, 1]`, where -1 means points in a pair are dissimilar (negative label), and 1 means they are similar (positive label). """ diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 8ee20d3a..d467e965 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -75,8 +75,8 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, @pytest.mark.parametrize('kwargs', [{'strategy': 'accuracy'}] + - [{'strategy': strategy, 'threshold': threshold} - for (strategy, threshold) in product( + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + [{'strategy': 'f_beta', 'beta': beta} for beta in [0., 0.1, 0.2, 1., 5.]] @@ -84,8 +84,8 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) -def test_threshold_different_scores_is_finite(estimator, build_dataset, - with_preprocessor, kwargs): +def test_min_rate_different_scores_is_finite(estimator, build_dataset, + with_preprocessor, kwargs): # test that the score returned is finite for every metric learner input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) @@ -189,10 +189,10 @@ def tnr_threshold(y_true, y_pred, tpr_threshold=0.): for b in [0.1, 0.5, 1.]] + [({'strategy': 'f_beta', 'beta': 0}, precision_score)] + - [({'strategy': 'max_tpr', 'threshold': t}, + [({'strategy': 'max_tpr', 'min_rate': t}, partial(tpr_threshold, tnr_threshold=t)) for t in [0., 0.1, 0.5, 0.8, 1.]] + - [({'strategy': 'max_tnr', 'threshold': t}, + [({'strategy': 'max_tnr', 'min_rate': t}, partial(tnr_threshold, tpr_threshold=t)) for t in [0., 0.1, 0.5, 0.8, 1.]], ) @@ -228,7 +228,7 @@ def test_found_score_is_best_score(kwargs, scoring): for b in [0.1, 0.5, 1.]] + [({'strategy': 'f_beta', 'beta': 0}, precision_score)] + - [({'strategy': 'max_tpr', 'threshold': t}, + [({'strategy': 'max_tpr', 'min_rate': t}, partial(tpr_threshold, tnr_threshold=t)) for t in [0., 0.1, 0.5, 0.8, 1.]] + [({'strategy': 'max_tnr', 'threshold': t}, @@ -271,10 +271,10 @@ def test_found_score_is_best_score_duplicates(kwargs, scoring): [({'strategy': 'weird'}, ('Strategy can either be "accuracy", "f_beta" or ' '"max_tpr" or "max_tnr". Got "weird" instead.'))] + - [({'strategy': strategy, 'threshold': threshold}, - 'Parameter threshold must be a number in' - '[0, 1]. Got {} instead.'.format(threshold)) - for (strategy, threshold) in product( + [({'strategy': strategy, 'min_rate': min_rate}, + 'Parameter min_rate must be a number in' + '[0, 1]. Got {} instead.'.format(min_rate)) + for (strategy, min_rate) in product( ['max_tpr', 'max_tnr'], [None, 'weird', -0.2, 1.2, 3 + 2j])] + [({'strategy': 'f_beta', 'beta': beta}, @@ -297,8 +297,8 @@ def test_calibrate_threshold_invalid_parameters_right_error(invalid_args, @pytest.mark.parametrize('valid_args', [{'strategy': 'accuracy'}] + - [{'strategy': strategy, 'threshold': threshold} - for (strategy, threshold) in product( + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + [{'strategy': 'f_beta', 'beta': beta} From 69c694528c0b8d7cda71dddbcbe7cbe42996bf13 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 11:21:19 +0100 Subject: [PATCH 23/41] Remove classes_ attribute and test for CalibratedClassifierCV --- metric_learn/base_metric.py | 6 ------ metric_learn/itml.py | 5 ----- metric_learn/mmc.py | 5 ----- metric_learn/sdml.py | 5 ----- test/test_sklearn_compat.py | 26 -------------------------- 5 files changed, 47 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index be2f5589..77672bba 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -302,14 +302,8 @@ class _PairsClassifierMixin(BaseMetricLearner): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. - - classes_ : `list` - The possible labels of the pairs the metric learner can fit on. - `classes_ = [-1, 1]`, where -1 means points in a pair are dissimilar - (negative label), and 1 means they are similar (positive label). """ - classes_ = [-1, 1] _tuple_size = 2 # number of points in a tuple, 2 for pairs def predict(self, pairs): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index e31368b0..57df1471 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -153,11 +153,6 @@ class ITML(_BaseITML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. - - classes_ : `list` - The possible labels of the pairs `ITML` can fit on. `classes_ = [-1, 1]`, - where -1 means points in a pair are dissimilar (negative label), and 1 - means they are similar (positive label). """ def fit(self, pairs, y, bounds=None): diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 138b1d71..2ddcced2 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -364,11 +364,6 @@ class MMC(_BaseMMC, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. - - classes_ : `list` - The possible labels of the pairs `MMC` can fit on. `classes_ = [-1, 1]`, - where -1 means points in a pair are dissimilar (negative label), and 1 - means they are similar (positive label). """ def fit(self, pairs, y): diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 536bd28a..096dc0ed 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -86,11 +86,6 @@ class SDML(_BaseSDML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. - - classes_ : `list` - The possible labels of the pairs `SDML` can fit on. `classes_ = [-1, 1]`, - where -1 means points in a pair are dissimilar (negative label), and 1 - means they are similar (positive label). """ def fit(self, pairs, y): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index e14e2cf9..5fc11aeb 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -1,6 +1,5 @@ import pytest import unittest -from sklearn.calibration import CalibratedClassifierCV from sklearn.utils.estimator_checks import check_estimator from sklearn.base import TransformerMixin from sklearn.pipeline import make_pipeline @@ -92,31 +91,6 @@ def test_mmc(self): # ---------------------- Test scikit-learn compatibility ---------------------- -@pytest.mark.parametrize('with_preprocessor', - [True, - # TODO: uncomment the below line as soon as - # https://github.com/scikit-learn/scikit-learn/ - # issues/13077 is solved: - # False, - ]) -@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, - ids=ids_pairs_learners) -def test_calibrated_classifier_CV(estimator, build_dataset, - with_preprocessor): - """Tests that metric-learn tuples estimators' work with scikit-learn's - CalibratedClassifierCV. - """ - input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - set_random_state(estimator) - calibrated_clf = CalibratedClassifierCV(estimator) - - # test fit and predict_proba - calibrated_clf.fit(input_data, labels) - calibrated_clf.predict_proba(input_data) - - @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) From bc393927937564ca75f7d781dc58194e2a000820 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 13:38:03 +0100 Subject: [PATCH 24/41] Rename make_args_inc_quadruplets into remove_y_quadruplets --- test/test_mahalanobis_mixin.py | 34 +++++++++--------- test/test_sklearn_compat.py | 65 ++++++++++++++++++---------------- test/test_utils.py | 22 ++++++------ 3 files changed, 62 insertions(+), 59 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index a85d9e8f..c25c52b1 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -12,7 +12,7 @@ from metric_learn._util import make_context from test.test_utils import (ids_metric_learners, metric_learners, - make_args_inc_quadruplets) + remove_y_quadruplets) RNG = check_random_state(0) @@ -26,7 +26,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairwise = model.score_pairs(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) @@ -50,7 +50,7 @@ def test_score_pairs_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.transformer_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - @@ -66,7 +66,7 @@ def test_score_pairs_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) assert np.isfinite(model.score_pairs(pairs)).all() @@ -80,7 +80,7 @@ def test_score_pairs_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) assert model.score_pairs(tuples).shape == (tuples.shape[0],) context = make_context(estimator) @@ -111,7 +111,7 @@ def test_embed_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) embedded_points = X.dot(model.transformer_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -123,7 +123,7 @@ def test_embed_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D @@ -140,7 +140,7 @@ def test_embed_dim(estimator, build_dataset): # avoid this enumeration and rather test if hasattr n_components # as soon as we have made the arguments names as such (issue #167) model.set_params(num_dims=2) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: @@ -155,7 +155,7 @@ def test_embed_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all() @@ -166,7 +166,7 @@ def test_embed_is_linear(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert_array_almost_equal(model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) @@ -185,7 +185,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) @@ -204,7 +204,7 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -230,7 +230,7 @@ def test_metric_raises_deprecation_warning(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) with pytest.warns(DeprecationWarning) as raised_warning: model.metric() @@ -247,7 +247,7 @@ def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X) @@ -260,7 +260,7 @@ def test_get_squared_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -280,10 +280,10 @@ def test_transformer_is_2D(estimator, build_dataset): model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features - model.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y_quadruplets(estimator, input_data, labels)) assert model.transformer_.shape == (X.shape[1], X.shape[1]) # test that it works for 1 feature trunc_data = input_data[..., :1] - model.fit(*make_args_inc_quadruplets(estimator, trunc_data, labels)) + model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) assert model.transformer_.shape == (1, 1) # the transformer must be 2D diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 5fc11aeb..7a51ee68 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -20,7 +20,7 @@ from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, ids_tuples_learners, pairs_learners, - ids_pairs_learners, make_args_inc_quadruplets, + ids_pairs_learners, remove_y_quadruplets, quadruplets_learners) @@ -140,13 +140,13 @@ def test_cross_validation_is_finite(estimator, build_dataset): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) assert np.isfinite(cross_val_score(estimator, - *make_args_inc_quadruplets(estimator, - input_data, - labels))).all() + *remove_y_quadruplets(estimator, + input_data, + labels))).all() assert np.isfinite(cross_val_predict(estimator, - *make_args_inc_quadruplets(estimator, - input_data, - labels) + *remove_y_quadruplets(estimator, + input_data, + labels) )).all() @@ -178,25 +178,28 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(*make_args_inc_quadruplets(estimator, - input_data[train_mask], - y_train)) + estimator.fit(*remove_y_quadruplets(estimator, + input_data[train_mask], + y_train)) if hasattr(estimator, "score"): - scores.append(estimator.score(*make_args_inc_quadruplets(estimator, - input_data[test_slice], y_test))) + scores.append(estimator.score(*remove_y_quadruplets( + estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): - assert all(scores == cross_val_score(estimator, - *make_args_inc_quadruplets(estimator, input_data, labels), cv=kfold)) + assert all(scores == cross_val_score( + estimator, *remove_y_quadruplets(estimator, input_data, labels), + cv=kfold)) if hasattr(estimator, "predict"): - assert all(predictions == cross_val_predict(estimator, - *make_args_inc_quadruplets(estimator, input_data, labels), cv=kfold)) + assert all(predictions == cross_val_predict( + estimator, + *remove_y_quadruplets(estimator, input_data, labels), + cv=kfold)) def check_score(estimator, tuples, y): if hasattr(estimator, "score"): - score = estimator.score(*make_args_inc_quadruplets(estimator, tuples, y)) + score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) assert np.isfinite(score) @@ -220,7 +223,7 @@ def test_simple_estimator(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(*make_args_inc_quadruplets(estimator, tuples_train, y_train)) + estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test) @@ -267,9 +270,9 @@ def test_estimators_fit_returns_self(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - assert estimator.fit(*make_args_inc_quadruplets(estimator, - input_data, - labels)) is estimator + assert estimator.fit(*remove_y_quadruplets(estimator, + input_data, + labels)) is estimator @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -299,18 +302,18 @@ def make_random_state(estimator, in_pipeline): estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) pipeline = make_pipeline(estimator) - estimator.fit(*make_args_inc_quadruplets(estimator, input_data, y), + estimator.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, False)) - pipeline.fit(*make_args_inc_quadruplets(estimator, input_data, y), + pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, True)) if hasattr(estimator, 'score'): - result = estimator.score(*make_args_inc_quadruplets(estimator, - input_data, - y)) - result_pipe = pipeline.score(*make_args_inc_quadruplets(estimator, - input_data, - y)) + result = estimator.score(*remove_y_quadruplets(estimator, + input_data, + y)) + result_pipe = pipeline.score(*remove_y_quadruplets(estimator, + input_data, + y)) assert_allclose_dense_sparse(result, result_pipe) if hasattr(estimator, 'predict'): @@ -336,7 +339,7 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "num_dims"): estimator.num_dims = 1 - estimator.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( @@ -367,7 +370,7 @@ def test_dont_overwrite_parameters(estimator, build_dataset, estimator.num_dims = 1 dict_before_fit = estimator.__dict__.copy() - estimator.fit(*make_args_inc_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() diff --git a/test/test_utils.py b/test/test_utils.py index bc5ea59b..821091a4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -143,7 +143,7 @@ def build_quadruplets(with_preprocessor=False): ids_metric_learners = ids_tuples_learners + ids_supervised_learners -def make_args_inc_quadruplets(estimator, X, y): +def remove_y_quadruplets(estimator, X, y): """Quadruplets learners have no y in fit, but to write test for all estimators, it is convenient to have this function, that will return X and y if the estimator needs a y to fit on, and just X otherwise.""" @@ -856,7 +856,7 @@ def test_error_message_tuple_size(estimator): [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) y = [1, 1] with pytest.raises(ValueError) as raised_err: - estimator.fit(*make_args_inc_quadruplets(estimator, invalid_pairs, y)) + estimator.fit(*remove_y_quadruplets(estimator, invalid_pairs, y)) expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(estimator._tuple_size, make_context(estimator), @@ -941,25 +941,25 @@ def make_random_state(estimator): estimator_with_preprocessor = clone(estimator) set_random_state(estimator_with_preprocessor) estimator_with_preprocessor.set_params(preprocessor=X) - estimator_with_preprocessor.fit(*make_args_inc_quadruplets(estimator, - indices_train, - y_train), + estimator_with_preprocessor.fit(*remove_y_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) estimator_without_preprocessor = clone(estimator) set_random_state(estimator_without_preprocessor) estimator_without_preprocessor.set_params(preprocessor=None) - estimator_without_preprocessor.fit(*make_args_inc_quadruplets(estimator, - formed_train, - y_train), + estimator_without_preprocessor.fit(*remove_y_quadruplets(estimator, + formed_train, + y_train), **make_random_state(estimator)) estimator_with_prep_formed = clone(estimator) set_random_state(estimator_with_prep_formed) estimator_with_prep_formed.set_params(preprocessor=X) - estimator_with_prep_formed.fit(*make_args_inc_quadruplets(estimator, - indices_train, - y_train), + estimator_with_prep_formed.fit(*remove_y_quadruplets(estimator, + indices_train, + y_train), **make_random_state(estimator)) # test prediction methods From facc546c2f2485e5efa2f98c86b62b5cdc98dea7 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 13:40:22 +0100 Subject: [PATCH 25/41] TST: Fix remaining threshold into min_rate --- test/test_pairs_classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index d467e965..f85e6125 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -231,7 +231,7 @@ def test_found_score_is_best_score(kwargs, scoring): [({'strategy': 'max_tpr', 'min_rate': t}, partial(tpr_threshold, tnr_threshold=t)) for t in [0., 0.1, 0.5, 0.8, 1.]] + - [({'strategy': 'max_tnr', 'threshold': t}, + [({'strategy': 'max_tnr', 'min_rate': t}, partial(tnr_threshold, tpr_threshold=t)) for t in [0., 0.1, 0.5, 0.8, 1.]] ) From f0ca65ec6ee340ba0309fe91a386d9186a19b5b5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 13:44:53 +0100 Subject: [PATCH 26/41] Remove default_threshold and put calibrate_threshold instead --- metric_learn/base_metric.py | 14 -------------- metric_learn/mmc.py | 2 +- metric_learn/sdml.py | 2 +- test/test_pairs_classifiers.py | 10 ---------- 4 files changed, 2 insertions(+), 26 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 77672bba..78450941 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -381,20 +381,6 @@ def score(self, pairs, y): """ return roc_auc_score(y, self.decision_function(pairs)) - def set_default_threshold(self, pairs, y): - """Sets the default threshold on the given dataset. - - Returns a threshold that is the mean between the similar - metrics mean and the dissimilar metrics mean. - - See more in the :ref:`User Guide `. - """ - similar_threshold = np.mean(self.score_pairs( - pairs[(y == 1).ravel()])) - dissimilar_threshold = np.mean(self.score_pairs( - pairs[(y == -1).ravel()])) - self.threshold_ = np.mean([similar_threshold, dissimilar_threshold]) - def set_threshold(self, threshold): """Sets the threshold of the metric learner to the given value `threshold`. diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 2ddcced2..0574f572 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -385,7 +385,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self.set_default_threshold(pairs, y) + self.calibrate_threshold(pairs, y) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 096dc0ed..d11bf3cc 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -107,7 +107,7 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self.set_default_threshold(pairs, y) + self.calibrate_threshold(pairs, y) return self diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index f85e6125..eb3e6b4b 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -120,16 +120,6 @@ def test_set_threshold(): assert identity_pairs_classifier.threshold_ == 0.5 -def test_set_default_threshold_toy_example(): - # test that the default threshold has the right value on a toy example - identity_pairs_classifier = IdentityPairsClassifier() - pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) - y = np.array([1, 1, -1, -1]) - identity_pairs_classifier.fit(pairs, y) - identity_pairs_classifier.set_default_threshold(pairs, y) - assert identity_pairs_classifier.threshold_ == 2.5 - - def test_f_beta_1_is_f_1(): # test that putting beta to 1 indeed finds the best threshold to optimize # the f1_score From a6ec2831b22e9de3efe51f83f0ef5cc3af1659db Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 13:54:31 +0100 Subject: [PATCH 27/41] Use calibrate_threshold for ITML, and remove description --- metric_learn/itml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 57df1471..9ce736f9 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -158,8 +158,6 @@ class ITML(_BaseITML, _PairsClassifierMixin): def fit(self, pairs, y, bounds=None): """Learn the ITML model. - The default threshold will be set to the mean of the bounds. - Parameters ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or @@ -184,7 +182,7 @@ def fit(self, pairs, y, bounds=None): Returns the instance. """ self._fit(pairs, y, bounds=bounds) - self.threshold_ = np.mean(self.bounds_) + self.threshold_ = self.calibrate_threshold(pairs, y) return self From 49fbbd783e7bbc29574584944c8cba5813a6c855 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 14:14:13 +0100 Subject: [PATCH 28/41] ENH: use calibrate_threshold by default and display its parameters from the fit method --- metric_learn/itml.py | 13 +++++++++++-- metric_learn/mmc.py | 18 +++++++++++++----- metric_learn/sdml.py | 17 +++++++++++++---- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 9ce736f9..09ee0bbe 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -155,9 +155,12 @@ class ITML(_BaseITML, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y, bounds=None): + def fit(self, pairs, y, bounds=None, threshold_params=None): """Learn the ITML model. + The threshold will be calibrated on the trainset using the parameters + `threshold_params`. + Parameters ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or @@ -175,6 +178,10 @@ def fit(self, pairs, y, bounds=None): If not provided at initialization, bounds_[0] and bounds_[1] will be set to the 5th and 95th percentile of the pairwise distances among all points present in the input `pairs`. + threshold_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- @@ -182,7 +189,9 @@ def fit(self, pairs, y, bounds=None): Returns the instance. """ self._fit(pairs, y, bounds=bounds) - self.threshold_ = self.calibrate_threshold(pairs, y) + self.calibrate_threshold(pairs, y, **(threshold_params if + threshold_params is not None else + dict())) return self diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 0574f572..30e935d2 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -366,26 +366,34 @@ class MMC(_BaseMMC, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y): + def fit(self, pairs, y, threshold_params=None): """Learn the MMC model. + The threshold will be calibrated on the trainset using the parameters + `threshold_params`. + Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a preprocessor. - y: array-like, of shape (n_constraints,) + y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - + threshold_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object Returns the instance. """ self._fit(pairs, y) - self.calibrate_threshold(pairs, y) + self.calibrate_threshold(pairs, y, **(threshold_params if + threshold_params is not None else + dict())) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index d11bf3cc..ee268ad5 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -88,18 +88,25 @@ class SDML(_BaseSDML, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y): + def fit(self, pairs, y, threshold_params=None): """Learn the SDML model. + The threshold will be calibrated on the trainset using the parameters + `threshold_params`. + Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a preprocessor. - y: array-like, of shape (n_constraints,) + y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + threshold_params : `dict` or `None` + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- @@ -107,7 +114,9 @@ def fit(self, pairs, y): Returns the instance. """ self._fit(pairs, y) - self.calibrate_threshold(pairs, y) + self.calibrate_threshold(pairs, y, **(threshold_params if + threshold_params is not None else + dict())) return self From 960b1748fbe8c103a6ffebd50dfa78f9d5518afa Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 14:30:13 +0100 Subject: [PATCH 29/41] Add a small test to test automatic calibration --- test/test_pairs_classifiers.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index eb3e6b4b..ae37a1f0 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -73,6 +73,29 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator.predict(input_data) +@pytest.mark.parametrize('threshold_param', + [None, {}, dict(), {'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [0., 0.1, 0.2, 1., 5.]] + ) +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', pairs_learners, + ids=ids_pairs_learners) +def test_fit_with_valid_threshold_params(estimator, build_dataset, + with_preprocessor, threshold_param): + """Tests that fitting `threshold_params` with appropriate parameters works + as expected""" + pairs, y, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + estimator.fit(pairs, y, threshold_params=threshold_param) + estimator.predict(pairs) + + @pytest.mark.parametrize('kwargs', [{'strategy': 'accuracy'}] + [{'strategy': strategy, 'min_rate': min_rate} @@ -84,9 +107,9 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, @pytest.mark.parametrize('with_preprocessor', [True, False]) @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) -def test_min_rate_different_scores_is_finite(estimator, build_dataset, - with_preprocessor, kwargs): - # test that the score returned is finite for every metric learner +def test_threshold_different_scores_is_finite(estimator, build_dataset, + with_preprocessor, kwargs): + # test that calibrating the threshold works for every metric learner input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) From c91acf787bd5841f034c1de00991ef2a35c4936a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 14:44:10 +0100 Subject: [PATCH 30/41] Update documentation of the default threshold --- doc/weakly_supervised.rst | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 2868f9e8..6bf6f993 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -168,16 +168,20 @@ learned space) than this threshold are predicted as similar, and points further away are predicted as dissimilar. Several methods are possible for this thresholding. -- **default**: Unless explicitely stated in the `fit` method documentation - of the estimator, the threshold is set with the method - `set_default_threshold` on the trainset. - -- **manual**: calling `set_threshold` will set the threshold to a +- **At fit time**: The threshold is set with `calibrate_threshold` (see + below) on the trainset. You can specify the calibration parameters directly + in the `fit` method with the `threshold_params` parameter (see the + documentation of the `fit` method of any metric learner that learns on pairs + of points for more information). This method can cause a little bit of + overfitting. If you want to avoid that, calibrate the threshold after + fitting, on a validation set. + +- **Manual**: calling `set_threshold` will set the threshold to a particular value. -- **calibrated**: calling `calibrate_threshold` will calibrate the threshold to - achieve a particular score on a validation set, the score being among the - classical scores for classification (accuracy, f1 score...). +- **Calibration**: calling `calibrate_threshold` will calibrate the + threshold to achieve a particular score on a validation set, the score + being among the classical scores for classification (accuracy, f1 score...). See also: `sklearn.calibration`. From a742186bf564805436c6536cc85199f68d45d5b2 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 15:51:58 +0100 Subject: [PATCH 31/41] Inverse sense for threshold comparison to be more intuitive --- metric_learn/base_metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 78450941..f65c1f96 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -326,7 +326,7 @@ def predict(self, pairs): The predicted learned metric value between samples in every pair. """ check_is_fitted(self, ['threshold_', 'transformer_']) - return 2 * (self.decision_function(pairs) >= - self.threshold_) - 1 + return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 def decision_function(self, pairs): """Returns the decision function used to classify the pairs. From 9ec1ead7b0fa04ff08c563f42fdcf92082ecfc98 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 21 Mar 2019 15:58:48 +0100 Subject: [PATCH 32/41] Address remaining review comments --- metric_learn/base_metric.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index f65c1f96..ac850c4f 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -504,9 +504,15 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', if strategy == 'f_beta': precision, recall, thresholds = precision_recall_curve( y_valid, self.decision_function(pairs_valid), pos_label=1) + # We ignore the warnings here, in the same taste as + # https://github.com/scikit-learn/scikit-learn/blob/62d205980446a1abc1065 + # f4332fd74eee57fcf73/sklearn/metrics/classification.py#L1284 with np.errstate(divide='ignore', invalid='ignore'): f_beta = ((1 + beta**2) * (precision * recall) / (beta**2 * precision + recall)) + # We need to set nans to zero otherwise they will be considered higher + # than the others (also discussed in https://github.com/scikit-learn/ + # scikit-learn/pull/10117/files#r262115773) f_beta[np.isnan(f_beta)] = 0. imax = np.argmax(f_beta) # note: we want a positive threshold (distance), so we take - threshold From 986fed31148c28aa29587164bde7c955876ee3d1 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 26 Mar 2019 10:06:19 +0100 Subject: [PATCH 33/41] MAINT: Rename threshold_params into calibration_params --- metric_learn/itml.py | 10 +++++----- metric_learn/mmc.py | 10 +++++----- metric_learn/sdml.py | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 09ee0bbe..76b790f3 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -155,11 +155,11 @@ class ITML(_BaseITML, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y, bounds=None, threshold_params=None): + def fit(self, pairs, y, bounds=None, calibration_params=None): """Learn the ITML model. The threshold will be calibrated on the trainset using the parameters - `threshold_params`. + `calibration_params`. Parameters ---------- @@ -178,7 +178,7 @@ def fit(self, pairs, y, bounds=None, threshold_params=None): If not provided at initialization, bounds_[0] and bounds_[1] will be set to the 5th and 95th percentile of the pairwise distances among all points present in the input `pairs`. - threshold_params : `dict` or `None` + calibration_params : `dict` or `None` Dictionary of parameters to give to `calibrate_threshold` for the threshold calibration step done at the end of `fit`. If `None` is given, `calibrate_threshold` will use the default parameters. @@ -189,8 +189,8 @@ def fit(self, pairs, y, bounds=None, threshold_params=None): Returns the instance. """ self._fit(pairs, y, bounds=bounds) - self.calibrate_threshold(pairs, y, **(threshold_params if - threshold_params is not None else + self.calibrate_threshold(pairs, y, **(calibration_params if + calibration_params is not None else dict())) return self diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 30e935d2..1d4e8fa6 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -366,11 +366,11 @@ class MMC(_BaseMMC, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y, threshold_params=None): + def fit(self, pairs, y, calibration_params=None): """Learn the MMC model. The threshold will be calibrated on the trainset using the parameters - `threshold_params`. + `calibration_params`. Parameters ---------- @@ -381,7 +381,7 @@ def fit(self, pairs, y, threshold_params=None): preprocessor. y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - threshold_params : `dict` or `None` + calibration_params : `dict` or `None` Dictionary of parameters to give to `calibrate_threshold` for the threshold calibration step done at the end of `fit`. If `None` is given, `calibrate_threshold` will use the default parameters. @@ -391,8 +391,8 @@ def fit(self, pairs, y, threshold_params=None): Returns the instance. """ self._fit(pairs, y) - self.calibrate_threshold(pairs, y, **(threshold_params if - threshold_params is not None else + self.calibrate_threshold(pairs, y, **(calibration_params if + calibration_params is not None else dict())) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index ee268ad5..0d24d096 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -88,11 +88,11 @@ class SDML(_BaseSDML, _PairsClassifierMixin): classified as dissimilar. """ - def fit(self, pairs, y, threshold_params=None): + def fit(self, pairs, y, calibration_params=None): """Learn the SDML model. The threshold will be calibrated on the trainset using the parameters - `threshold_params`. + `calibration_params`. Parameters ---------- @@ -103,7 +103,7 @@ def fit(self, pairs, y, threshold_params=None): preprocessor. y : array-like, of shape (n_constraints,) Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. - threshold_params : `dict` or `None` + calibration_params : `dict` or `None` Dictionary of parameters to give to `calibrate_threshold` for the threshold calibration step done at the end of `fit`. If `None` is given, `calibrate_threshold` will use the default parameters. @@ -114,8 +114,8 @@ def fit(self, pairs, y, threshold_params=None): Returns the instance. """ self._fit(pairs, y) - self.calibrate_threshold(pairs, y, **(threshold_params if - threshold_params is not None else + self.calibrate_threshold(pairs, y, **(calibration_params if + calibration_params is not None else dict())) return self From 3f5d6d1447c1fc01ed51f2fc24daf7b52adcb1ba Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 26 Mar 2019 12:06:25 +0100 Subject: [PATCH 34/41] TST: Add test for extreme cases --- test/test_pairs_classifiers.py | 78 ++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index ae37a1f0..d02a2078 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -330,3 +330,81 @@ def test_calibrate_threshold_valid_parameters(valid_args): with pytest.warns(None) as record: pairs_learner.calibrate_threshold(pairs, y, **valid_args) assert len(record) == 0 + + +def test_calibrate_threshold_extreme(): + """Test that in the (rare) case where we should accept all points or + reject all points, this is effectively what + is done""" + + class MockBadPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): + """A pairs classifier that returns bad scores (i.e. in the inverse order + of what we would expect from a good pairs classifier + """ + + def fit(self, pairs, y, calibration_params=None): + self.transformer_ = 'not used' + self.calibrate_threshold(pairs, y, **(calibration_params if + calibration_params is not None else + dict())) + return self + + def decision_function(self, pairs): + return np.arange(7) + rng = np.random.RandomState(42) + pairs = rng.randn(7, 2, 5) # the info in X is not used, it's just for the + # API + + y = [1, 1, 1, -1, -1, -1, -1] + mock_clf = MockBadPairsClassifier() + # case of bad scoring with more negative than positives. In + # this case, when: + # optimizing for accuracy we should reject all points + mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) + assert (mock_clf.predict(pairs) == - np.ones(7)).all() + + # optimizing for max_tpr we should accept all points if min_rate == 0. ( + # because by convention then tnr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', + 'min_rate': 0.}) + assert (mock_clf.predict(pairs) == np.ones(7)).all() + # optimizing for max_tnr we should reject all points if min_rate = 0. ( + # because by convention then tpr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', + 'min_rate': 0.}) + assert (mock_clf.predict(pairs) == - np.ones(7)).all() + + y = [1, 1, 1, 1, -1, -1, -1] + # case of bad scoring with more positives than negatives. In + # this case, when: + # optimizing for accuracy we should accept all points + mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) + assert (mock_clf.predict(pairs) == np.ones(7)).all() + # optimizing for max_tpr we should accept all points if min_rate == 0. ( + # because by convention then tnr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', + 'min_rate': 0.}) + assert (mock_clf.predict(pairs) == np.ones(7)).all() + # optimizing for max_tnr we should reject all points if min_rate = 0. ( + # because by convention then tpr=0/0=0) + mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', + 'min_rate': 0.}) + assert (mock_clf.predict(pairs) == - np.ones(7)).all() + + # Note: we'll never find a case where we would reject all points for + # maximizing tpr (we can always accept more points), and accept all + # points for maximizing tnr (we can always reject more points) + + # case of alternated scores: for optimizing the f_1 score we should accept + # all points (because this way we have max recall (1) and max precision ( + # here: 0.5)) + y = [1, -1, 1, -1, 1, -1] + mock_clf.fit(pairs, y, calibration_params={'strategy': 'f_beta', + 'beta': 1.}) + assert (mock_clf.predict(pairs) == - np.ones(7)).all() + + # Note: for optimizing f_1 score, we will never find an optimal case where we + # reject all points because in this case we would have 0 precision (by + # convention, because it's 0/0), and 0 recall (and we could always decrease + # the threshold to increase the recall, and we couldn't do worse for + # precision so it would be better) From 7b5e4ddd0cbf2489cd7a001e66121226cf32ed60 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 26 Mar 2019 17:03:58 +0100 Subject: [PATCH 35/41] MAINT: rename threshold_params into calibration_params --- test/test_pairs_classifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index d02a2078..be780dc6 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -92,7 +92,7 @@ def test_fit_with_valid_threshold_params(estimator, build_dataset, estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(pairs, y, threshold_params=threshold_param) + estimator.fit(pairs, y, calibration_params=threshold_param) estimator.predict(pairs) From a3ec02c364145691bdaff8c7beb723d4a2138870 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 26 Mar 2019 17:07:15 +0100 Subject: [PATCH 36/41] MAINT: rename threshold_params into calibration_params --- test/test_pairs_classifiers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index be780dc6..70fb6c11 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -73,7 +73,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator.predict(input_data) -@pytest.mark.parametrize('threshold_param', +@pytest.mark.parametrize('calibration_params', [None, {}, dict(), {'strategy': 'accuracy'}] + [{'strategy': strategy, 'min_rate': min_rate} for (strategy, min_rate) in product( @@ -85,14 +85,15 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, @pytest.mark.parametrize('estimator, build_dataset', pairs_learners, ids=ids_pairs_learners) def test_fit_with_valid_threshold_params(estimator, build_dataset, - with_preprocessor, threshold_param): - """Tests that fitting `threshold_params` with appropriate parameters works + with_preprocessor, + calibration_params): + """Tests that fitting `calibration_params` with appropriate parameters works as expected""" pairs, y, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(pairs, y, calibration_params=threshold_param) + estimator.fit(pairs, y, calibration_params=calibration_params) estimator.predict(pairs) From ccc66eba9df2c639a86dcda1d725875b7840944b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 27 Mar 2019 11:30:35 +0100 Subject: [PATCH 37/41] FIX: Make tests work, and add the right threshold (mean between lowest accepted value and highest rejected value), and max + 1 or min - 1 for extreme points --- metric_learn/base_metric.py | 71 ++++++++++++++++++++++++++-------- test/test_pairs_classifiers.py | 29 +++++++------- 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index ac850c4f..27c294df 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,7 +1,8 @@ from sklearn.base import BaseEstimator +from sklearn.metrics.ranking import _binary_clf_curve from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted -from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve +from sklearn.metrics import roc_auc_score, roc_curve import numpy as np from abc import ABCMeta, abstractmethod import six @@ -490,20 +491,39 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', scores_sorted = scores[scores_sorted_idces] # true labels ordered by decision_function value: (higher first) y_ordered = y_valid[scores_sorted_idces] + # we need to add a threshold that will reject all points + scores_sorted = np.concatenate([[scores_sorted[0] + 1], scores_sorted]) + # finds the threshold that maximizes the accuracy: cum_tp = stable_cumsum(y_ordered == 1) # cumulative number of true # positives + # we need to add the point where all samples are rejected: + cum_tp = np.concatenate([[0.], cum_tp]) cum_tn_inverted = stable_cumsum(y_ordered[::-1] == -1) - cum_tn = np.concatenate([[0], cum_tn_inverted[:-1]])[::-1] + cum_tn = np.concatenate([[0.], cum_tn_inverted])[::-1] cum_accuracy = (cum_tp + cum_tn) / n_samples imax = np.argmax(cum_accuracy) # note: we want a positive threshold (distance), so we take - threshold - self.threshold_ = - scores_sorted[imax] + if imax == len(scores_sorted): # if the best is to accept all points + # we set the threshold to (minus) [the lowest score - 1] + self.threshold_ = - (scores_sorted[imax] - 1) + else: + # otherwise, we set the threshold to the mean between the lowest + # accepted score and the highest accepted score + self.threshold_ = - np.mean(scores_sorted[imax: imax + 2]) + # note: if the best is to reject all points it's already one of the + # thresholds (scores_sorted[0] + 1) return self if strategy == 'f_beta': - precision, recall, thresholds = precision_recall_curve( + fps, tps, thresholds = _binary_clf_curve( y_valid, self.decision_function(pairs_valid), pos_label=1) + + precision = tps / (tps + fps) + precision[np.isnan(precision)] = 0 + recall = tps / tps[-1] + + # here the thresholds are decreasing # We ignore the warnings here, in the same taste as # https://github.com/scikit-learn/scikit-learn/blob/62d205980446a1abc1065 # f4332fd74eee57fcf73/sklearn/metrics/classification.py#L1284 @@ -516,26 +536,45 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', f_beta[np.isnan(f_beta)] = 0. imax = np.argmax(f_beta) # note: we want a positive threshold (distance), so we take - threshold - self.threshold_ = - thresholds[imax] + if imax == len(thresholds): # the best is to accept all points + # we set the threshold to (minus) [the lowest score - 1] + self.threshold_ = - (thresholds[imax] - 1) + else: + # otherwise, we set the threshold to the mean between the lowest + # accepted score and the highest rejected score + self.threshold_ = - np.mean(thresholds[imax: imax + 2]) + # Note: we don't need to deal with rejecting all points (i.e. threshold = + # max_scores + 1), since this can never happen to be optimal + # (see a more detailed discussion in test_calibrate_threshold_extreme) return self fpr, tpr, thresholds = roc_curve(y_valid, self.decision_function(pairs_valid), - pos_label=1) + pos_label=1, drop_intermediate=False) + # here the thresholds are decreasing fpr, tpr, thresholds = fpr, tpr, thresholds - if strategy == 'max_tpr': - indices = np.where(1 - fpr >= min_rate)[0] - max_tpr_index = np.argmax(tpr[indices]) - # note: we want a positive threshold (distance), so we take - threshold - self.threshold_ = - thresholds[indices[max_tpr_index]] + if strategy in ['max_tpr', 'max_tnr']: + if strategy == 'max_tpr': + indices = np.where(1 - fpr >= min_rate)[0] + imax = np.argmax(tpr[indices]) - if strategy == 'max_tnr': - indices = np.where(tpr >= min_rate)[0] - max_tnr_index = np.argmax(1 - fpr[indices]) + if strategy == 'max_tnr': + indices = np.where(tpr >= min_rate)[0] + imax = np.argmax(1 - fpr[indices]) + + imax_valid = indices[imax] # note: we want a positive threshold (distance), so we take - threshold - self.threshold_ = - thresholds[indices[max_tnr_index]] - return self + if indices[imax] == len(thresholds): # we want to accept everything + self.threshold_ = - (thresholds[imax_valid] - 1) + elif indices[imax] == 0: # we want to reject everything + # thanks to roc_curve, the first point should be always max_threshold + # + 1 (we should always go through the "if" statement in roc_curve), + # see: https://github.com/scikit-learn/scikit-learn/pull/13523 + self.threshold_ = - (thresholds[imax_valid]) + else: + self.threshold_ = - np.mean(thresholds[imax_valid: imax_valid + 2]) + return self class _QuadrupletsClassifierMixin(BaseMetricLearner): diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 70fb6c11..6c1d584e 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -3,6 +3,8 @@ from functools import partial import pytest +from numpy.testing import assert_array_equal + from metric_learn.base_metric import _PairsClassifierMixin, MahalanobisMixin from sklearn.exceptions import NotFittedError from sklearn.metrics import (f1_score, accuracy_score, fbeta_score, @@ -351,46 +353,47 @@ def fit(self, pairs, y, calibration_params=None): return self def decision_function(self, pairs): - return np.arange(7) + return np.arange(pairs.shape[0], dtype=float) + rng = np.random.RandomState(42) pairs = rng.randn(7, 2, 5) # the info in X is not used, it's just for the # API - y = [1, 1, 1, -1, -1, -1, -1] + y = [1., 1., 1., -1., -1., -1., -1.] mock_clf = MockBadPairsClassifier() # case of bad scoring with more negative than positives. In # this case, when: # optimizing for accuracy we should reject all points mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) - assert (mock_clf.predict(pairs) == - np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) # optimizing for max_tpr we should accept all points if min_rate == 0. ( # because by convention then tnr=0/0=0) mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', 'min_rate': 0.}) - assert (mock_clf.predict(pairs) == np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) # optimizing for max_tnr we should reject all points if min_rate = 0. ( # because by convention then tpr=0/0=0) mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', 'min_rate': 0.}) - assert (mock_clf.predict(pairs) == - np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) - y = [1, 1, 1, 1, -1, -1, -1] + y = [1., 1., 1., 1., -1., -1., -1.] # case of bad scoring with more positives than negatives. In # this case, when: # optimizing for accuracy we should accept all points mock_clf.fit(pairs, y, calibration_params={'strategy': 'accuracy'}) - assert (mock_clf.predict(pairs) == np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) # optimizing for max_tpr we should accept all points if min_rate == 0. ( # because by convention then tnr=0/0=0) mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tpr', 'min_rate': 0.}) - assert (mock_clf.predict(pairs) == np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), np.ones(7)) # optimizing for max_tnr we should reject all points if min_rate = 0. ( # because by convention then tpr=0/0=0) mock_clf.fit(pairs, y, calibration_params={'strategy': 'max_tnr', 'min_rate': 0.}) - assert (mock_clf.predict(pairs) == - np.ones(7)).all() + assert_array_equal(mock_clf.predict(pairs), - np.ones(7)) # Note: we'll never find a case where we would reject all points for # maximizing tpr (we can always accept more points), and accept all @@ -399,10 +402,10 @@ def decision_function(self, pairs): # case of alternated scores: for optimizing the f_1 score we should accept # all points (because this way we have max recall (1) and max precision ( # here: 0.5)) - y = [1, -1, 1, -1, 1, -1] - mock_clf.fit(pairs, y, calibration_params={'strategy': 'f_beta', - 'beta': 1.}) - assert (mock_clf.predict(pairs) == - np.ones(7)).all() + y = [1., -1., 1., -1., 1., -1.] + mock_clf.fit(pairs[:6], y, calibration_params={'strategy': 'f_beta', + 'beta': 1.}) + assert_array_equal(mock_clf.predict(pairs[:6]), np.ones(6)) # Note: for optimizing f_1 score, we will never find an optimal case where we # reject all points because in this case we would have 0 precision (by From 719d0183e477fbf5cb841fe85b3c07155600e273 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 2 Apr 2019 13:54:37 +0200 Subject: [PATCH 38/41] Go back to previous version of finding the threshold --- metric_learn/base_metric.py | 40 ++++++++++--------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 27c294df..83cefe85 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,8 +1,7 @@ from sklearn.base import BaseEstimator -from sklearn.metrics.ranking import _binary_clf_curve from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted -from sklearn.metrics import roc_auc_score, roc_curve +from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve import numpy as np from abc import ABCMeta, abstractmethod import six @@ -503,26 +502,17 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', cum_tn = np.concatenate([[0.], cum_tn_inverted])[::-1] cum_accuracy = (cum_tp + cum_tn) / n_samples imax = np.argmax(cum_accuracy) + # we set the threshold to the lowest accepted score # note: we want a positive threshold (distance), so we take - threshold - if imax == len(scores_sorted): # if the best is to accept all points - # we set the threshold to (minus) [the lowest score - 1] - self.threshold_ = - (scores_sorted[imax] - 1) - else: - # otherwise, we set the threshold to the mean between the lowest - # accepted score and the highest accepted score - self.threshold_ = - np.mean(scores_sorted[imax: imax + 2]) + self.threshold_ = - scores_sorted[imax] # note: if the best is to reject all points it's already one of the - # thresholds (scores_sorted[0] + 1) + # thresholds (scores_sorted[0]) return self if strategy == 'f_beta': - fps, tps, thresholds = _binary_clf_curve( + precision, recall, thresholds = precision_recall_curve( y_valid, self.decision_function(pairs_valid), pos_label=1) - precision = tps / (tps + fps) - precision[np.isnan(precision)] = 0 - recall = tps / tps[-1] - # here the thresholds are decreasing # We ignore the warnings here, in the same taste as # https://github.com/scikit-learn/scikit-learn/blob/62d205980446a1abc1065 @@ -535,14 +525,9 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', # scikit-learn/pull/10117/files#r262115773) f_beta[np.isnan(f_beta)] = 0. imax = np.argmax(f_beta) + # we set the threshold to the lowest accepted score # note: we want a positive threshold (distance), so we take - threshold - if imax == len(thresholds): # the best is to accept all points - # we set the threshold to (minus) [the lowest score - 1] - self.threshold_ = - (thresholds[imax] - 1) - else: - # otherwise, we set the threshold to the mean between the lowest - # accepted score and the highest rejected score - self.threshold_ = - np.mean(thresholds[imax: imax + 2]) + self.threshold_ = - thresholds[imax] # Note: we don't need to deal with rejecting all points (i.e. threshold = # max_scores + 1), since this can never happen to be optimal # (see a more detailed discussion in test_calibrate_threshold_extreme) @@ -550,7 +535,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', fpr, tpr, thresholds = roc_curve(y_valid, self.decision_function(pairs_valid), - pos_label=1, drop_intermediate=False) + pos_label=1) # here the thresholds are decreasing fpr, tpr, thresholds = fpr, tpr, thresholds @@ -567,13 +552,10 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', # note: we want a positive threshold (distance), so we take - threshold if indices[imax] == len(thresholds): # we want to accept everything self.threshold_ = - (thresholds[imax_valid] - 1) - elif indices[imax] == 0: # we want to reject everything - # thanks to roc_curve, the first point should be always max_threshold - # + 1 (we should always go through the "if" statement in roc_curve), - # see: https://github.com/scikit-learn/scikit-learn/pull/13523 - self.threshold_ = - (thresholds[imax_valid]) else: - self.threshold_ = - np.mean(thresholds[imax_valid: imax_valid + 2]) + # thanks to roc_curve, the first point will always be max_scores + # + 1, see: https://github.com/scikit-learn/scikit-learn/pull/13523 + self.threshold_ = - thresholds[imax_valid] return self From 551d1619fb430e6bf8a916a65069cebb4e147063 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 2 Apr 2019 13:59:20 +0200 Subject: [PATCH 39/41] Extract method for validating calibration parameters --- metric_learn/base_metric.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 83cefe85..56e667af 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -462,23 +462,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', sklearn.calibration : scikit-learn's module for calibrating classifiers """ - if strategy not in ('accuracy', 'f_beta', 'max_tpr', - 'max_tnr'): - raise ValueError('Strategy can either be "accuracy", "f_beta" or ' - '"max_tpr" or "max_tnr". Got "{}" instead.' - .format(strategy)) - - if strategy == 'max_tpr' or strategy == 'max_tnr': - if (min_rate is None or not isinstance(min_rate, (int, float)) or - not min_rate >= 0 or not min_rate <= 1): - raise ValueError('Parameter min_rate must be a number in' - '[0, 1]. ' - 'Got {} instead.'.format(min_rate)) - - if strategy == 'f_beta': - if beta is None or not isinstance(beta, (int, float)): - raise ValueError('Parameter beta must be a real number. ' - 'Got {} instead.'.format(type(beta))) + self._validate_calibration_parameters(beta, min_rate, strategy) pairs_valid, y_valid = self._prepare_inputs(pairs_valid, y_valid, type_of_inputs='tuples') @@ -558,6 +542,24 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', self.threshold_ = - thresholds[imax_valid] return self + def _validate_calibration_parameters(self, beta, min_rate, strategy): + """Ensure that calibration parameters have allowed values""" + if strategy not in ('accuracy', 'f_beta', 'max_tpr', + 'max_tnr'): + raise ValueError('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "{}" instead.' + .format(strategy)) + if strategy == 'max_tpr' or strategy == 'max_tnr': + if (min_rate is None or not isinstance(min_rate, (int, float)) or + not min_rate >= 0 or not min_rate <= 1): + raise ValueError('Parameter min_rate must be a number in' + '[0, 1]. ' + 'Got {} instead.'.format(min_rate)) + if strategy == 'f_beta': + if beta is None or not isinstance(beta, (int, float)): + raise ValueError('Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + class _QuadrupletsClassifierMixin(BaseMetricLearner): From 594c485fe01dd4242ba0986059103f17c8d31071 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 2 Apr 2019 15:27:23 +0200 Subject: [PATCH 40/41] Validate calibration params before fit --- metric_learn/base_metric.py | 6 ++- metric_learn/itml.py | 9 ++-- metric_learn/mmc.py | 7 ++-- metric_learn/sdml.py | 7 ++-- test/test_pairs_classifiers.py | 77 ++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 12 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 56e667af..d4f00b13 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -462,7 +462,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', sklearn.calibration : scikit-learn's module for calibrating classifiers """ - self._validate_calibration_parameters(beta, min_rate, strategy) + self._validate_calibration_params(strategy, min_rate, beta) pairs_valid, y_valid = self._prepare_inputs(pairs_valid, y_valid, type_of_inputs='tuples') @@ -542,7 +542,9 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', self.threshold_ = - thresholds[imax_valid] return self - def _validate_calibration_parameters(self, beta, min_rate, strategy): + @staticmethod + def _validate_calibration_params(strategy='accuracy', min_rate=None, + beta=1.): """Ensure that calibration parameters have allowed values""" if strategy not in ('accuracy', 'f_beta', 'max_tpr', 'max_tnr'): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 76b790f3..9b6dccb2 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -188,10 +188,11 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): self : object Returns the instance. """ - self._fit(pairs, y, bounds=bounds) - self.calibrate_threshold(pairs, y, **(calibration_params if - calibration_params is not None else - dict())) + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) + self._fit(pairs, y) + self.calibrate_threshold(pairs, y, **calibration_params) return self diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 1d4e8fa6..346db2f8 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -390,10 +390,11 @@ def fit(self, pairs, y, calibration_params=None): self : object Returns the instance. """ + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) self._fit(pairs, y) - self.calibrate_threshold(pairs, y, **(calibration_params if - calibration_params is not None else - dict())) + self.calibrate_threshold(pairs, y, **calibration_params) return self diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index a07a8b49..e9828d07 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -170,10 +170,11 @@ def fit(self, pairs, y, calibration_params=None): self : object Returns the instance. """ + calibration_params = (calibration_params if calibration_params is not + None else dict()) + self._validate_calibration_params(**calibration_params) self._fit(pairs, y) - self.calibrate_threshold(pairs, y, **(calibration_params if - calibration_params is not None else - dict())) + self.calibrate_threshold(pairs, y, **calibration_params) return self diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 6c1d584e..828181cb 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -412,3 +412,80 @@ def decision_function(self, pairs): # convention, because it's 0/0), and 0 recall (and we could always decrease # the threshold to increase the recall, and we couldn't do worse for # precision so it would be better) + + +@pytest.mark.parametrize('estimator, _', + pairs_learners + [(IdentityPairsClassifier(), None), + (_PairsClassifierMixin, None)], + ids=ids_pairs_learners + ['mock', 'class']) +@pytest.mark.parametrize('invalid_args, expected_msg', + [({'strategy': 'weird'}, + ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.'))] + + [({'strategy': strategy, 'min_rate': min_rate}, + 'Parameter min_rate must be a number in' + '[0, 1]. Got {} instead.'.format(min_rate)) + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [None, 'weird', -0.2, 1.2, 3 + 2j])] + + [({'strategy': 'f_beta', 'beta': beta}, + 'Parameter beta must be a real number. ' + 'Got {} instead.'.format(type(beta))) + for beta in [None, 'weird', 3 + 2j]] + ) +def test_validate_calibration_params_invalid_parameters_right_error( + estimator, _, invalid_args, expected_msg): + # test that the right error message is returned if invalid arguments are + # given to _validate_calibration_params, for all pairs metric learners as + # well as a mocking general identity pairs classifier and the class itself + with pytest.raises(ValueError) as raised_error: + estimator._validate_calibration_params(**invalid_args) + assert str(raised_error.value) == expected_msg + + +@pytest.mark.parametrize('estimator, _', + pairs_learners + [(IdentityPairsClassifier(), None), + (_PairsClassifierMixin, None)], + ids=ids_pairs_learners + ['mock', 'class']) +@pytest.mark.parametrize('valid_args', + [{}, {'strategy': 'accuracy'}] + + [{'strategy': strategy, 'min_rate': min_rate} + for (strategy, min_rate) in product( + ['max_tpr', 'max_tnr'], + [0., 0.2, 0.8, 1.])] + + [{'strategy': 'f_beta', 'beta': beta} + for beta in [-5., -1., 0., 0.1, 0.2, 1., 5.]] + # Note that we authorize beta < 0 (even if + # in fact it will be squared, so it would be useless + # to do that) + ) +def test_validate_calibration_params_valid_parameters( + estimator, _, valid_args): + # test that no warning message is returned if valid arguments are given to + # _validate_calibration_params for all pairs metric learners, as well as + # a mocking example, and the class itself + with pytest.warns(None) as record: + estimator._validate_calibration_params(**valid_args) + assert len(record) == 0 + + +@pytest.mark.parametrize('estimator, build_dataset', + pairs_learners, + ids=ids_pairs_learners) +def test_validate_calibration_params_invalid_parameters_error_before__fit( + estimator, build_dataset): + """For all pairs metric learners (which currently all have a _fit method), + make sure that calibration parameters are validated before fitting""" + estimator = clone(estimator) + input_data, labels, _, _ = build_dataset() + + def breaking_fun(**args): # a function that fails so that we will miss + # the calibration at the end and therefore the right error message from + # validating params should be thrown before + raise RuntimeError('Game over.') + estimator._fit = breaking_fun + expected_msg = ('Strategy can either be "accuracy", "f_beta" or ' + '"max_tpr" or "max_tnr". Got "weird" instead.') + with pytest.raises(ValueError) as raised_error: + estimator.fit(input_data, labels, calibration_params={'strategy': 'weird'}) + assert str(raised_error.value) == expected_msg From 14713c638646f40af7b42538ad799bb12f70f3c3 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 2 Apr 2019 15:48:09 +0200 Subject: [PATCH 41/41] Address https://github.com/metric-learn/metric-learn/pull/168#discussion_r268109180 --- metric_learn/base_metric.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index d4f00b13..9f127f58 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -487,7 +487,8 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', cum_accuracy = (cum_tp + cum_tn) / n_samples imax = np.argmax(cum_accuracy) # we set the threshold to the lowest accepted score - # note: we want a positive threshold (distance), so we take - threshold + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign self.threshold_ = - scores_sorted[imax] # note: if the best is to reject all points it's already one of the # thresholds (scores_sorted[0]) @@ -510,7 +511,8 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', f_beta[np.isnan(f_beta)] = 0. imax = np.argmax(f_beta) # we set the threshold to the lowest accepted score - # note: we want a positive threshold (distance), so we take - threshold + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign self.threshold_ = - thresholds[imax] # Note: we don't need to deal with rejecting all points (i.e. threshold = # max_scores + 1), since this can never happen to be optimal @@ -533,7 +535,8 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', imax = np.argmax(1 - fpr[indices]) imax_valid = indices[imax] - # note: we want a positive threshold (distance), so we take - threshold + # note: we are working with negative distances but we want the threshold + # to be with respect to the actual distances so we take minus sign if indices[imax] == len(thresholds): # we want to accept everything self.threshold_ = - (thresholds[imax_valid] - 1) else: