Fix 7 sources of warnings in the tests (#339)

mvargas33 · web-flow · commit 6a4aaea62d35 · 2021-10-21T11:36:03.000-04:00
* Fix 7 sources of warnings

* Fix indentation

* Generalized warnings, as old sklearn throw more warnings

* Changed np.any() for any()

* Fix identation
diff --git a/metric_learn/_util.py b/metric_learn/_util.py
@@ -704,7 +704,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
   elif init == 'covariance':
     if input.ndim == 3:
       # if the input are tuples, we need to form an X by deduplication
-      X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
+      X = np.unique(np.vstack(input), axis=0)
     else:
       X = input
     # atleast2d is necessary to deal with scalar covariance matrices
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
@@ -32,7 +32,7 @@ def _fit(self, pairs, y, bounds=None):
                                     type_of_inputs='tuples')
     # init bounds
     if bounds is None:
-      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
+      X = np.unique(np.vstack(pairs), axis=0)
       self.bounds_ = np.percentile(pairwise_distances(X), (5, 95))
     else:
       bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0,
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
@@ -112,7 +112,7 @@ def fit(self, X, chunks):
     # Fisher Linear Discriminant projection
     if dim < X.shape[1]:
       total_cov = np.cov(X[chunk_mask], rowvar=0)
-      tmp = np.linalg.lstsq(total_cov, inner_cov)[0]
+      tmp = np.linalg.lstsq(total_cov, inner_cov, rcond=None)[0]
       vals, vecs = np.linalg.eig(tmp)
       inds = np.argsort(vals)[:dim]
       A = vecs[:, inds]
diff --git a/metric_learn/scml.py b/metric_learn/scml.py
@@ -615,10 +615,10 @@ def _generate_bases_LDA(self, X, y):
     k_class = np.vstack((np.minimum(class_count, scales[0]),
                          np.minimum(class_count, scales[1])))
 
-    idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int),
-               np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int)]
+    idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int64),
+               np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int64)]
 
-    start_finish_indices = np.hstack((np.zeros((2, 1), np.int),
+    start_finish_indices = np.hstack((np.zeros((2, 1), np.int64),
                                      k_class)).cumsum(axis=1)
 
     neigh = NearestNeighbors()
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+markers =
+  integration: mark a test as integration
+  unit: mark a test as unit
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
@@ -9,7 +9,6 @@
                               make_spd_matrix)
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_allclose)
-from metric_learn.sklearn_shims import assert_warns_message
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.validation import check_X_y
 from sklearn.preprocessing import StandardScaler
@@ -1143,9 +1142,10 @@ def test_convergence_warning(dataset, algo_class):
     X, y = dataset
     model = algo_class(max_iter=2, verbose=True)
     cls_name = model.__class__.__name__
-    assert_warns_message(ConvergenceWarning,
-                         '[{}] {} did not converge'.format(cls_name, cls_name),
-                         model.fit, X, y)
+    msg = '[{}] {} did not converge'.format(cls_name, cls_name)
+    with pytest.warns(Warning) as raised_warning:
+      model.fit(X, y)
+    assert any([msg in str(warn.message) for warn in raised_warning])
 
 
 if __name__ == '__main__':
diff --git a/test/test_constraints.py b/test/test_constraints.py
@@ -103,7 +103,7 @@ def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test):
 
 
 @pytest.mark.parametrize("k_genuine, k_impostor,",
-                         [(2, 3), (3, 3), (2, 4), (3, 4)])
+                         [(3, 3), (2, 4), (3, 4), (10, 9), (144, 33)])
 def test_generate_knntriplets(k_genuine, k_impostor):
   """Checks edge and over the edge cases of knn triplet construction with not
      enough neighbors"""
@@ -118,8 +118,23 @@ def test_generate_knntriplets(k_genuine, k_impostor):
   X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]])
   y = np.array([1, 1, 1, 2, 2, 2, -1])
 
-  T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor)
-
+  msg1 = ("The class 1 has 3 elements, which is not sufficient to "
+          f"generate {k_genuine+1} genuine neighbors "
+          "as specified by k_genuine")
+  msg2 = ("The class 2 has 3 elements, which is not sufficient to "
+          f"generate {k_genuine+1} genuine neighbors "
+          "as specified by k_genuine")
+  msg3 = ("The class 1 has 3 elements of other classes, which is "
+          f"not sufficient to generate {k_impostor} impostor "
+          "neighbors as specified by k_impostor")
+  msg4 = ("The class 2 has 3 elements of other classes, which is "
+          f"not sufficient to generate {k_impostor} impostor "
+          "neighbors as specified by k_impostor")
+  msgs = [msg1, msg2, msg3, msg4]
+  with pytest.warns(UserWarning) as user_warning:
+    T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor)
+  assert any([[msg in str(warn.message) for msg in msgs]
+             for warn in user_warning])
   assert np.array_equal(sorted(T.tolist()), T_test)
 
 
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
@@ -235,7 +235,7 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset,
     n_splits = 3
     kfold = KFold(shuffle=False, n_splits=n_splits)
     n_samples = input_data.shape[0]
-    fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
+    fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int64)
     fold_sizes[:n_samples % n_splits] += 1
     current = 0
     scores, predictions = [], np.zeros(input_data.shape[0])
diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py
@@ -1,6 +1,7 @@
 import pytest
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import train_test_split
+import metric_learn
 
 from test.test_utils import triplets_learners, ids_triplets_learners
 from metric_learn.sklearn_shims import set_random_state
@@ -20,7 +21,13 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset,
   estimator.set_params(preprocessor=preprocessor)
   set_random_state(estimator)
   triplets_train, triplets_test = train_test_split(input_data)
-  estimator.fit(triplets_train)
+  if isinstance(estimator, metric_learn.SCML):
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      estimator.fit(triplets_train)
+    assert msg in str(raised_warning[0].message)
+  else:
+    estimator.fit(triplets_train)
   predictions = estimator.predict(triplets_test)
 
   not_valid = [e for e in predictions if e not in [-1, 1]]
@@ -42,7 +49,13 @@ def test_no_zero_prediction(estimator, build_dataset):
   # Dummy fit
   estimator = clone(estimator)
   set_random_state(estimator)
-  estimator.fit(triplets)
+  if isinstance(estimator, metric_learn.SCML):
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      estimator.fit(triplets)
+    assert msg in str(raised_warning[0].message)
+  else:
+    estimator.fit(triplets)
   # We force the transformation to be identity, to force euclidean distance
   estimator.components_ = np.eye(X.shape[1])
 
@@ -93,7 +106,13 @@ def test_accuracy_toy_example(estimator, build_dataset):
   triplets, _, _, X = build_dataset(with_preprocessor=False)
   estimator = clone(estimator)
   set_random_state(estimator)
-  estimator.fit(triplets)
+  if isinstance(estimator, metric_learn.SCML):
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      estimator.fit(triplets)
+    assert msg in str(raised_warning[0].message)
+  else:
+    estimator.fit(triplets)
   # We take the two first points and we build 4 regularly spaced points on the
   # line they define, so that it's easy to build triplets of different
   # similarities.