[MRG] Be compatible with newer scikit-learn (#199)

wdevazelhes · web-flow · commit 05a8d411c25f · 2019-05-13T10:47:30.000+02:00
* Update travis to use previous scikit-learn's versions for older pythons * Update code to work with both versions * Install scikit-learn before skggm * Simpler replacement of spaces and newlines that is compatible with python 2.7 * Address #199 (review) * Address #199 (review)
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,12 @@ python:
 before_install:
   - sudo apt-get install liblapack-dev
   - pip install --upgrade pip pytest
-  - pip install wheel cython numpy scipy scikit-learn codecov pytest-cov
+  - pip install wheel cython numpy scipy codecov pytest-cov
+  - if $TRAVIS_PYTHON_VERSION == "3.6"; then
+        pip install scikit-learn;
+    else
+        pip install scikit-learn==0.20.3;
+    fi
   - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") ||
           ($TRAVIS_PYTHON_VERSION == "2.7")]]; then
               pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
diff --git a/README.rst b/README.rst
@@ -20,7 +20,7 @@ Metric Learning algorithms in Python.
 **Dependencies**
 
 -  Python 2.7+, 3.4+
--  numpy, scipy, scikit-learn
+-  numpy, scipy, scikit-learn>=0.20.3
 
 **Optional dependencies**
 
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
@@ -15,7 +15,7 @@ Alternately, download the source repository and run:
 **Dependencies**
 
 -  Python 2.7+, 3.4+
--  numpy, scipy, scikit-learn
+-  numpy, scipy, scikit-learn>=0.20.3
 
 **Optional dependencies**
 
diff --git a/metric_learn/_util.py b/metric_learn/_util.py
@@ -22,8 +22,7 @@ def check_input(input_data, y=None, preprocessor=None,
                 dtype='numeric', order=None,
                 copy=False, force_all_finite=True,
                 multi_output=False, ensure_min_samples=1,
-                ensure_min_features=1, y_numeric=False,
-                warn_on_dtype=False, estimator=None):
+                ensure_min_features=1, y_numeric=False, estimator=None):
   """Checks that the input format is valid, and converts it if specified
   (this is the equivalent of scikit-learn's `check_array` or `check_X_y`).
   All arguments following tuple_size are scikit-learn's `check_X_y`
@@ -88,10 +87,6 @@ def check_input(input_data, y=None, preprocessor=None,
     is originally 1D and ``ensure_2d`` is True. Setting to 0 disables
     this check.
 
-  warn_on_dtype : boolean (default=False)
-    Raise DataConversionWarning if the dtype of the input data structure
-    does not match the requested dtype, causing a memory copy.
-
   estimator : str or estimator instance (default=`None`)
     If passed, include the name of the estimator in warning messages.
 
@@ -111,7 +106,7 @@ def check_input(input_data, y=None, preprocessor=None,
                             copy=copy, force_all_finite=force_all_finite,
                             ensure_min_samples=ensure_min_samples,
                             ensure_min_features=ensure_min_features,
-                            warn_on_dtype=warn_on_dtype, estimator=estimator)
+                            estimator=estimator)
 
   # We need to convert input_data into a numpy.ndarray if possible, before
   # any further checks or conversions, and deal with y if needed. Therefore
@@ -321,9 +316,8 @@ def __init__(self, X):
                     accept_sparse=True, dtype=None,
                     force_all_finite=False,
                     ensure_2d=False, allow_nd=True,
-                    ensure_min_samples=0,
-                    ensure_min_features=0,
-                    warn_on_dtype=False, estimator=None)
+                    ensure_min_samples=0, ensure_min_features=0,
+                    estimator=None)
     self.X = X
 
   def __call__(self, indices):
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
@@ -1,4 +1,5 @@
 import pytest
+import re
 import unittest
 import metric_learn
 import numpy as np
@@ -7,84 +8,103 @@
 from test.test_utils import ids_metric_learners, metric_learners
 
 
+def remove_spaces(s):
+  return re.sub('\s+', '', s)
+
+
 class TestStringRepr(unittest.TestCase):
 
   def test_covariance(self):
-    self.assertEqual(str(metric_learn.Covariance()),
-                     "Covariance(preprocessor=None)")
+    self.assertEqual(remove_spaces(str(metric_learn.Covariance())),
+                     remove_spaces("Covariance(preprocessor=None)"))
 
   def test_lmnn(self):
     self.assertRegexpMatches(
-        str(metric_learn.LMNN()),
-        r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, "
-        r"max_iter=1000,\n      min_iter=50, preprocessor=None, "
-        r"regularization=0.5, use_pca=True,\n      verbose=False\)")
+      str(metric_learn.LMNN()),
+      r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, "
+      r"max_iter=1000,\s+min_iter=50, preprocessor=None, "
+      r"regularization=0.5, use_pca=True,\s+verbose=False\)")
 
   def test_nca(self):
-    self.assertEqual(str(metric_learn.NCA()),
-                     "NCA(max_iter=100, num_dims=None, preprocessor=None, "
-                     "tol=None, verbose=False)")
+    self.assertEqual(remove_spaces(str(metric_learn.NCA())),
+                     remove_spaces(
+                       "NCA(max_iter=100, num_dims=None, preprocessor=None, "
+                       "tol=None, verbose=False)"))
 
   def test_lfda(self):
-    self.assertEqual(str(metric_learn.LFDA()),
-                     "LFDA(embedding_type='weighted', k=None, num_dims=None, "
-                     "preprocessor=None)")
+    self.assertEqual(remove_spaces(str(metric_learn.LFDA())),
+                     remove_spaces(
+                       "LFDA(embedding_type='weighted', k=None, "
+                       "num_dims=None, "
+                       "preprocessor=None)"))
 
   def test_itml(self):
-    self.assertEqual(str(metric_learn.ITML()), """
+    self.assertEqual(remove_spaces(str(metric_learn.ITML())),
+                     remove_spaces("""
 ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000,
    preprocessor=None, verbose=False)
-""".strip('\n'))
-    self.assertEqual(str(metric_learn.ITML_Supervised()), """
+"""))
+    self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())),
+                     remove_spaces("""
 ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001,
         gamma=1.0, max_iter=1000, num_constraints=None,
         num_labeled='deprecated', preprocessor=None, verbose=False)
-""".strip('\n'))
+"""))
 
   def test_lsml(self):
     self.assertEqual(
-        str(metric_learn.LSML()),
+      remove_spaces(str(metric_learn.LSML())),
+      remove_spaces(
         "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, "
-        "verbose=False)")
-    self.assertEqual(str(metric_learn.LSML_Supervised()), """
+        "verbose=False)"))
+    self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())),
+                     remove_spaces("""
 LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated',
         preprocessor=None, prior=None, tol=0.001, verbose=False,
         weights=None)
-""".strip('\n'))
+"""))
 
   def test_sdml(self):
-    self.assertEqual(str(metric_learn.SDML()),
-                     "SDML(balance_param=0.5, preprocessor=None, "
-                     "sparsity_param=0.01, use_cov=True,\n   verbose=False)")
-    self.assertEqual(str(metric_learn.SDML_Supervised()), """
+    self.assertEqual(remove_spaces(str(metric_learn.SDML())),
+                     remove_spaces(
+                       "SDML(balance_param=0.5, preprocessor=None, "
+                       "sparsity_param=0.01, use_cov=True,"
+                       "\n   verbose=False)"))
+    self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())),
+                     remove_spaces("""
 SDML_Supervised(balance_param=0.5, num_constraints=None,
         num_labeled='deprecated', preprocessor=None, sparsity_param=0.01,
         use_cov=True, verbose=False)
-""".strip('\n'))
+"""))
 
   def test_rca(self):
-    self.assertEqual(str(metric_learn.RCA()),
-                     "RCA(num_dims=None, pca_comps=None, preprocessor=None)")
-    self.assertEqual(str(metric_learn.RCA_Supervised()),
-                     "RCA_Supervised(chunk_size=2, num_chunks=100, "
-                     "num_dims=None, pca_comps=None,\n        "
-                     "preprocessor=None)")
+    self.assertEqual(remove_spaces(str(metric_learn.RCA())),
+                     remove_spaces("RCA(num_dims=None, pca_comps=None, "
+                                   "preprocessor=None)"))
+    self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())),
+                     remove_spaces(
+                       "RCA_Supervised(chunk_size=2, num_chunks=100, "
+                       "num_dims=None, pca_comps=None,\n        "
+                       "preprocessor=None)"))
 
   def test_mlkr(self):
-    self.assertEqual(str(metric_learn.MLKR()),
-                     "MLKR(A0=None, max_iter=1000, num_dims=None, "
-                     "preprocessor=None, tol=None,\n   verbose=False)")
+    self.assertEqual(remove_spaces(str(metric_learn.MLKR())),
+                     remove_spaces(
+                       "MLKR(A0=None, max_iter=1000, num_dims=None, "
+                       "preprocessor=None, tol=None,\n   verbose=False)"))
 
   def test_mmc(self):
-    self.assertEqual(str(metric_learn.MMC()), """
+    self.assertEqual(remove_spaces(str(metric_learn.MMC())),
+                     remove_spaces("""
 MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0,
   max_iter=100, max_proj=10000, preprocessor=None, verbose=False)
-""".strip('\n'))
-    self.assertEqual(str(metric_learn.MMC_Supervised()), """
+"""))
+    self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())),
+                     remove_spaces("""
 MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False,
         diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None,
         num_labeled='deprecated', preprocessor=None, verbose=False)
-""".strip('\n'))
+"""))
 
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -300,35 +300,6 @@ def test_check_tuples_invalid_n_samples(estimator, context, load_tuples,
   assert str(raised_error.value) == msg
 
 
-@pytest.mark.parametrize('estimator, context',
-                         [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")])
-@pytest.mark.parametrize('load_tuples, preprocessor',
-                         [(tuples_prep, mock_preprocessor),
-                          (tuples_no_prep, None),
-                          (tuples_no_prep, mock_preprocessor)])
-def test_check_tuples_invalid_dtype_convertible(estimator, context,
-                                                load_tuples, preprocessor):
-  """Checks that a warning is raised if a convertible input is converted to
-  float"""
-  tuples = load_tuples().astype(object)  # here the object conversion is
-  # useless for the tuples_prep case, but this allows to test the
-  # tuples_prep case
-
-  if preprocessor is not None:  # if the preprocessor is not None we
-    # overwrite it to have a preprocessor that returns objects
-    def preprocessor(indices):  #
-      # preprocessor that returns objects
-      return np.ones((indices.shape[0], 3)).astype(object)
-
-  msg = ("Data with input dtype object was converted to float64{}."
-         .format(context))
-  with pytest.warns(DataConversionWarning) as raised_warning:
-    check_input(tuples, type_of_inputs='tuples',
-                preprocessor=preprocessor, dtype=np.float64,
-                warn_on_dtype=True, estimator=estimator)
-  assert str(raised_warning[0].message) == msg
-
-
 def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor():
   """Checks that a value error is thrown if attempting to convert an
   input not convertible to float, when using a preprocessor
@@ -530,36 +501,6 @@ def test_check_classic_invalid_n_samples(estimator, context, load_points,
   assert str(raised_error.value) == msg
 
 
-@pytest.mark.parametrize('estimator, context',
-                         [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")])
-@pytest.mark.parametrize('load_points, preprocessor',
-                         [(points_prep, mock_preprocessor),
-                          (points_no_prep, None),
-                          (points_no_prep, mock_preprocessor)])
-def test_check_classic_invalid_dtype_convertible(estimator, context,
-                                                 load_points,
-                                                 preprocessor):
-  """Checks that a warning is raised if a convertible input is converted to
-  float"""
-  points = load_points().astype(object)  # here the object conversion is
-  # useless for the points_prep case, but this allows to test the
-  # points_prep case
-
-  if preprocessor is not None:  # if the preprocessor is not None we
-    # overwrite it to have a preprocessor that returns objects
-    def preprocessor(indices):
-      # preprocessor that returns objects
-      return np.ones((indices.shape[0], 3)).astype(object)
-
-  msg = ("Data with input dtype object was converted to float64{}."
-         .format(context))
-  with pytest.warns(DataConversionWarning) as raised_warning:
-    check_input(points, type_of_inputs='classic',
-                preprocessor=preprocessor, dtype=np.float64,
-                warn_on_dtype=True, estimator=estimator)
-  assert str(raised_warning[0].message) == msg
-
-
 @pytest.mark.parametrize('preprocessor, points',
                          [(mock_preprocessor, np.array([['a', 'b'],
                                                         ['e', 'b']])),