From d14f84712926c68d93aebd657aff247f0cbfec20 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 20 Oct 2021 14:53:41 +0200 Subject: [PATCH 1/5] Fix 7 sources of warnings --- metric_learn/_util.py | 2 +- metric_learn/itml.py | 2 +- metric_learn/rca.py | 2 +- metric_learn/scml.py | 6 +++--- pytest.ini | 4 ++++ test/metric_learn_test.py | 7 ++++--- test/test_constraints.py | 22 +++++++++++++++++++--- test/test_sklearn_compat.py | 2 +- test/test_triplets_classifiers.py | 25 ++++++++++++++++++++++--- 9 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 pytest.ini diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 764a34c8..868ececa 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -704,7 +704,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, elif init == 'covariance': if input.ndim == 3: # if the input are tuples, we need to form an X by deduplication - X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)}) + X = np.unique(np.vstack(input), axis=0) else: X = input # atleast2d is necessary to deal with scalar covariance matrices diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 43872b60..9fa3b75e 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -32,7 +32,7 @@ def _fit(self, pairs, y, bounds=None): type_of_inputs='tuples') # init bounds if bounds is None: - X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) + X = np.unique(np.vstack(pairs), axis=0) self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0, diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 34f7f3ff..1da00062 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -112,7 +112,7 @@ def fit(self, X, chunks): # Fisher Linear Discriminant projection if dim < X.shape[1]: total_cov = np.cov(X[chunk_mask], rowvar=0) - tmp = np.linalg.lstsq(total_cov, inner_cov)[0] + tmp = np.linalg.lstsq(total_cov, inner_cov, rcond=None)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] diff --git a/metric_learn/scml.py b/metric_learn/scml.py index ee585018..b86c6fe1 100644 --- a/metric_learn/scml.py +++ b/metric_learn/scml.py @@ -615,10 +615,10 @@ def _generate_bases_LDA(self, X, y): k_class = np.vstack((np.minimum(class_count, scales[0]), np.minimum(class_count, scales[1]))) - idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int), - np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int)] + idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int64), + np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int64)] - start_finish_indices = np.hstack((np.zeros((2, 1), np.int), + start_finish_indices = np.hstack((np.zeros((2, 1), np.int64), k_class)).cumsum(axis=1) neigh = NearestNeighbors() diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..ef3c8acb --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + integration: mark a test as integration + unit: mark a test as unit \ No newline at end of file diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 2debe426..b8b4b5b8 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1143,9 +1143,10 @@ def test_convergence_warning(dataset, algo_class): X, y = dataset model = algo_class(max_iter=2, verbose=True) cls_name = model.__class__.__name__ - assert_warns_message(ConvergenceWarning, - '[{}] {} did not converge'.format(cls_name, cls_name), - model.fit, X, y) + msg = '[{}] {} did not converge'.format(cls_name, cls_name) + with pytest.warns(Warning) as raised_warning: + model.fit(X, y) + assert msg in str(raised_warning[0].message) if __name__ == '__main__': diff --git a/test/test_constraints.py b/test/test_constraints.py index 92876779..e9a33a0c 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -1,3 +1,4 @@ +import warnings import pytest import numpy as np from sklearn.utils import shuffle @@ -103,7 +104,7 @@ def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): @pytest.mark.parametrize("k_genuine, k_impostor,", - [(2, 3), (3, 3), (2, 4), (3, 4)]) + [(3, 3), (2, 4), (3, 4), (10, 9), (144,33)]) def test_generate_knntriplets(k_genuine, k_impostor): """Checks edge and over the edge cases of knn triplet construction with not enough neighbors""" @@ -118,8 +119,23 @@ def test_generate_knntriplets(k_genuine, k_impostor): X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) y = np.array([1, 1, 1, 2, 2, 2, -1]) - T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) - + msg1 = ("The class 1 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine" ) + msg2 = ("The class 2 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine" ) + msg3 = ("The class 1 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msg4 = ("The class 2 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msgs = [msg1, msg2, msg3, msg4] + with pytest.warns(UserWarning) as user_warning: + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + for warn in user_warning: + assert np.any([msg in str(warn.message) for msg in msgs]) assert np.array_equal(sorted(T.tolist()), T_test) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 3ad69712..2d8f9c64 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -224,7 +224,7 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, n_splits = 3 kfold = KFold(shuffle=False, n_splits=n_splits) n_samples = input_data.shape[0] - fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int64) fold_sizes[:n_samples % n_splits] += 1 current = 0 scores, predictions = [], np.zeros(input_data.shape[0]) diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py index f2d5c015..600947e6 100644 --- a/test/test_triplets_classifiers.py +++ b/test/test_triplets_classifiers.py @@ -1,6 +1,7 @@ import pytest from sklearn.exceptions import NotFittedError from sklearn.model_selection import train_test_split +import metric_learn from test.test_utils import triplets_learners, ids_triplets_learners from metric_learn.sklearn_shims import set_random_state @@ -20,7 +21,13 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) triplets_train, triplets_test = train_test_split(input_data) - estimator.fit(triplets_train) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets_train) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets_train) predictions = estimator.predict(triplets_test) not_valid = [e for e in predictions if e not in [-1, 1]] @@ -42,7 +49,13 @@ def test_no_zero_prediction(estimator, build_dataset): # Dummy fit estimator = clone(estimator) set_random_state(estimator) - estimator.fit(triplets) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets) # We force the transformation to be identity, to force euclidean distance estimator.components_ = np.eye(X.shape[1]) @@ -93,7 +106,13 @@ def test_accuracy_toy_example(estimator, build_dataset): triplets, _, _, X = build_dataset(with_preprocessor=False) estimator = clone(estimator) set_random_state(estimator) - estimator.fit(triplets) + if isinstance(estimator, metric_learn.SCML): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + estimator.fit(triplets) + assert msg in str(raised_warning[0].message) + else: + estimator.fit(triplets) # We take the two first points and we build 4 regularly spaced points on the # line they define, so that it's easy to build triplets of different # similarities. From 70ff2c26145a44407768fb57486efb0441bab53c Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 20 Oct 2021 15:27:29 +0200 Subject: [PATCH 2/5] Fix indentation --- test/metric_learn_test.py | 1 - test/test_constraints.py | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index b8b4b5b8..0f6ea6a8 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -9,7 +9,6 @@ make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) -from metric_learn.sklearn_shims import assert_warns_message from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y from sklearn.preprocessing import StandardScaler diff --git a/test/test_constraints.py b/test/test_constraints.py index e9a33a0c..32fd9b4c 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -1,4 +1,3 @@ -import warnings import pytest import numpy as np from sklearn.utils import shuffle @@ -104,7 +103,7 @@ def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): @pytest.mark.parametrize("k_genuine, k_impostor,", - [(3, 3), (2, 4), (3, 4), (10, 9), (144,33)]) + [(3, 3), (2, 4), (3, 4), (10, 9), (144, 33)]) def test_generate_knntriplets(k_genuine, k_impostor): """Checks edge and over the edge cases of knn triplet construction with not enough neighbors""" @@ -120,11 +119,11 @@ def test_generate_knntriplets(k_genuine, k_impostor): y = np.array([1, 1, 1, 2, 2, 2, -1]) msg1 = ("The class 1 has 3 elements, which is not sufficient to " - f"generate {k_genuine+1} genuine neighbors " - "as specified by k_genuine" ) + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine") msg2 = ("The class 2 has 3 elements, which is not sufficient to " f"generate {k_genuine+1} genuine neighbors " - "as specified by k_genuine" ) + "as specified by k_genuine") msg3 = ("The class 1 has 3 elements of other classes, which is " f"not sufficient to generate {k_impostor} impostor " "neighbors as specified by k_impostor") From 1b48802242d440155915b2e2b95a55b815df1b68 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 20 Oct 2021 15:52:29 +0200 Subject: [PATCH 3/5] Generalized warnings, as old sklearn throw more warnings --- test/metric_learn_test.py | 2 +- test/test_constraints.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 0f6ea6a8..e5958591 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1145,7 +1145,7 @@ def test_convergence_warning(dataset, algo_class): msg = '[{}] {} did not converge'.format(cls_name, cls_name) with pytest.warns(Warning) as raised_warning: model.fit(X, y) - assert msg in str(raised_warning[0].message) + assert np.any([msg in str(warn.message) for warn in raised_warning]) if __name__ == '__main__': diff --git a/test/test_constraints.py b/test/test_constraints.py index 32fd9b4c..ba4903fc 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -133,8 +133,8 @@ def test_generate_knntriplets(k_genuine, k_impostor): msgs = [msg1, msg2, msg3, msg4] with pytest.warns(UserWarning) as user_warning: T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) - for warn in user_warning: - assert np.any([msg in str(warn.message) for msg in msgs]) + assert np.any(np.any([[msg in str(warn.message) for msg in msgs] + for warn in user_warning])) assert np.array_equal(sorted(T.tolist()), T_test) From 2696256c97fd4be6cab6105e227a3894995a565b Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Thu, 21 Oct 2021 11:04:45 +0200 Subject: [PATCH 4/5] Changed np.any() for any() --- test/metric_learn_test.py | 2 +- test/test_constraints.py | 3 +-- test/test_mahalanobis_mixin.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e5958591..542e1e0a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1145,7 +1145,7 @@ def test_convergence_warning(dataset, algo_class): msg = '[{}] {} did not converge'.format(cls_name, cls_name) with pytest.warns(Warning) as raised_warning: model.fit(X, y) - assert np.any([msg in str(warn.message) for warn in raised_warning]) + assert any([msg in str(warn.message) for warn in raised_warning]) if __name__ == '__main__': diff --git a/test/test_constraints.py b/test/test_constraints.py index ba4903fc..0e9d99e2 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -133,8 +133,7 @@ def test_generate_knntriplets(k_genuine, k_impostor): msgs = [msg1, msg2, msg3, msg4] with pytest.warns(UserWarning) as user_warning: T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) - assert np.any(np.any([[msg in str(warn.message) for msg in msgs] - for warn in user_warning])) + assert any([[msg in str(warn.message) for msg in msgs] for warn in user_warning]) assert np.array_equal(sorted(T.tolist()), T_test) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e3d981a4..35458dad 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -625,7 +625,7 @@ def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): 'preprocessing step.') with pytest.warns(UserWarning) as raised_warning: model.fit(input_data, labels) - assert np.any([str(warning.message) == msg for warning in raised_warning]) + assert any([str(warning.message) == msg for warning in raised_warning]) M, _ = _initialize_metric_mahalanobis(X, init='covariance', random_state=RNG, return_inverse=True, From 4aeb2e89c1bc78d2af411ddbb2fea80190400f33 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Thu, 21 Oct 2021 11:05:23 +0200 Subject: [PATCH 5/5] Fix identation --- test/test_constraints.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_constraints.py b/test/test_constraints.py index 0e9d99e2..def228d4 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -133,7 +133,8 @@ def test_generate_knntriplets(k_genuine, k_impostor): msgs = [msg1, msg2, msg3, msg4] with pytest.warns(UserWarning) as user_warning: T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) - assert any([[msg in str(warn.message) for msg in msgs] for warn in user_warning]) + assert any([[msg in str(warn.message) for msg in msgs] + for warn in user_warning]) assert np.array_equal(sorted(T.tolist()), T_test)