diff --git a/.travis.yml b/.travis.yml index f5527089..0e510a9f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: before_install: - sudo apt-get install liblapack-dev - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy scikit-learn codecov pytest-cov + - pip install wheel cython numpy scipy codecov pytest-cov + - if $TRAVIS_PYTHON_VERSION == "3.6"; then + pip install scikit-learn; + else + pip install scikit-learn==0.20.3; + fi - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || ($TRAVIS_PYTHON_VERSION == "2.7")]]; then pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; diff --git a/README.rst b/README.rst index e1bfca51..32a9bb90 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Metric Learning algorithms in Python. **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 2d2df25e..d620e401 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -15,7 +15,7 @@ Alternately, download the source repository and run: **Dependencies** - Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn +- numpy, scipy, scikit-learn>=0.20.3 **Optional dependencies** diff --git a/metric_learn/_util.py b/metric_learn/_util.py index 33311620..397654bf 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -22,8 +22,7 @@ def check_input(input_data, y=None, preprocessor=None, dtype='numeric', order=None, copy=False, force_all_finite=True, multi_output=False, ensure_min_samples=1, - ensure_min_features=1, y_numeric=False, - warn_on_dtype=False, estimator=None): + ensure_min_features=1, y_numeric=False, estimator=None): """Checks that the input format is valid, and converts it if specified (this is the equivalent of scikit-learn's `check_array` or `check_X_y`). All arguments following tuple_size are scikit-learn's `check_X_y` @@ -88,10 +87,6 @@ def check_input(input_data, y=None, preprocessor=None, is originally 1D and ``ensure_2d`` is True. Setting to 0 disables this check. - warn_on_dtype : boolean (default=False) - Raise DataConversionWarning if the dtype of the input data structure - does not match the requested dtype, causing a memory copy. - estimator : str or estimator instance (default=`None`) If passed, include the name of the estimator in warning messages. @@ -111,7 +106,7 @@ def check_input(input_data, y=None, preprocessor=None, copy=copy, force_all_finite=force_all_finite, ensure_min_samples=ensure_min_samples, ensure_min_features=ensure_min_features, - warn_on_dtype=warn_on_dtype, estimator=estimator) + estimator=estimator) # We need to convert input_data into a numpy.ndarray if possible, before # any further checks or conversions, and deal with y if needed. Therefore @@ -309,9 +304,8 @@ def __init__(self, X): accept_sparse=True, dtype=None, force_all_finite=False, ensure_2d=False, allow_nd=True, - ensure_min_samples=0, - ensure_min_features=0, - warn_on_dtype=False, estimator=None) + ensure_min_samples=0, ensure_min_features=0, + estimator=None) self.X = X def __call__(self, indices): diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 6c9a6dc5..e5f2e17b 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,4 +1,5 @@ import pytest +import re import unittest import metric_learn import numpy as np @@ -7,84 +8,103 @@ from test.test_utils import ids_metric_learners, metric_learners +def remove_spaces(s): + return re.sub('\s+', '', s) + + class TestStringRepr(unittest.TestCase): def test_covariance(self): - self.assertEqual(str(metric_learn.Covariance()), - "Covariance(preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.Covariance())), + remove_spaces("Covariance(preprocessor=None)")) def test_lmnn(self): self.assertRegexpMatches( - str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " - r"max_iter=1000,\n min_iter=50, preprocessor=None, " - r"regularization=0.5, use_pca=True,\n verbose=False\)") + str(metric_learn.LMNN()), + r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, " + r"max_iter=1000,\s+min_iter=50, preprocessor=None, " + r"regularization=0.5, use_pca=True,\s+verbose=False\)") def test_nca(self): - self.assertEqual(str(metric_learn.NCA()), - "NCA(max_iter=100, num_dims=None, preprocessor=None, " - "tol=None, verbose=False)") + self.assertEqual(remove_spaces(str(metric_learn.NCA())), + remove_spaces( + "NCA(max_iter=100, num_dims=None, preprocessor=None, " + "tol=None, verbose=False)")) def test_lfda(self): - self.assertEqual(str(metric_learn.LFDA()), - "LFDA(embedding_type='weighted', k=None, num_dims=None, " - "preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.LFDA())), + remove_spaces( + "LFDA(embedding_type='weighted', k=None, " + "num_dims=None, " + "preprocessor=None)")) def test_itml(self): - self.assertEqual(str(metric_learn.ITML()), """ + self.assertEqual(remove_spaces(str(metric_learn.ITML())), + remove_spaces(""" ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000, preprocessor=None, verbose=False) -""".strip('\n')) - self.assertEqual(str(metric_learn.ITML_Supervised()), """ +""")) + self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), + remove_spaces(""" ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001, gamma=1.0, max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, verbose=False) -""".strip('\n')) +""")) def test_lsml(self): self.assertEqual( - str(metric_learn.LSML()), + remove_spaces(str(metric_learn.LSML())), + remove_spaces( "LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, " - "verbose=False)") - self.assertEqual(str(metric_learn.LSML_Supervised()), """ + "verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())), + remove_spaces(""" LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated', preprocessor=None, prior=None, tol=0.001, verbose=False, weights=None) -""".strip('\n')) +""")) def test_sdml(self): - self.assertEqual(str(metric_learn.SDML()), - "SDML(balance_param=0.5, preprocessor=None, " - "sparsity_param=0.01, use_cov=True,\n verbose=False)") - self.assertEqual(str(metric_learn.SDML_Supervised()), """ + self.assertEqual(remove_spaces(str(metric_learn.SDML())), + remove_spaces( + "SDML(balance_param=0.5, preprocessor=None, " + "sparsity_param=0.01, use_cov=True," + "\n verbose=False)")) + self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())), + remove_spaces(""" SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, use_cov=True, verbose=False) -""".strip('\n')) +""")) def test_rca(self): - self.assertEqual(str(metric_learn.RCA()), - "RCA(num_dims=None, pca_comps=None, preprocessor=None)") - self.assertEqual(str(metric_learn.RCA_Supervised()), - "RCA_Supervised(chunk_size=2, num_chunks=100, " - "num_dims=None, pca_comps=None,\n " - "preprocessor=None)") + self.assertEqual(remove_spaces(str(metric_learn.RCA())), + remove_spaces("RCA(num_dims=None, pca_comps=None, " + "preprocessor=None)")) + self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), + remove_spaces( + "RCA_Supervised(chunk_size=2, num_chunks=100, " + "num_dims=None, pca_comps=None,\n " + "preprocessor=None)")) def test_mlkr(self): - self.assertEqual(str(metric_learn.MLKR()), - "MLKR(A0=None, max_iter=1000, num_dims=None, " - "preprocessor=None, tol=None,\n verbose=False)") + self.assertEqual(remove_spaces(str(metric_learn.MLKR())), + remove_spaces( + "MLKR(A0=None, max_iter=1000, num_dims=None, " + "preprocessor=None, tol=None,\n verbose=False)")) def test_mmc(self): - self.assertEqual(str(metric_learn.MMC()), """ + self.assertEqual(remove_spaces(str(metric_learn.MMC())), + remove_spaces(""" MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, preprocessor=None, verbose=False) -""".strip('\n')) - self.assertEqual(str(metric_learn.MMC_Supervised()), """ +""")) + self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())), + remove_spaces(""" MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, num_labeled='deprecated', preprocessor=None, verbose=False) -""".strip('\n')) +""")) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, diff --git a/test/test_utils.py b/test/test_utils.py index 52ebc7a6..3f6eb1a3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -299,35 +299,6 @@ def test_check_tuples_invalid_n_samples(estimator, context, load_tuples, assert str(raised_error.value) == msg -@pytest.mark.parametrize('estimator, context', - [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -@pytest.mark.parametrize('load_tuples, preprocessor', - [(tuples_prep, mock_preprocessor), - (tuples_no_prep, None), - (tuples_no_prep, mock_preprocessor)]) -def test_check_tuples_invalid_dtype_convertible(estimator, context, - load_tuples, preprocessor): - """Checks that a warning is raised if a convertible input is converted to - float""" - tuples = load_tuples().astype(object) # here the object conversion is - # useless for the tuples_prep case, but this allows to test the - # tuples_prep case - - if preprocessor is not None: # if the preprocessor is not None we - # overwrite it to have a preprocessor that returns objects - def preprocessor(indices): # - # preprocessor that returns objects - return np.ones((indices.shape[0], 3)).astype(object) - - msg = ("Data with input dtype object was converted to float64{}." - .format(context)) - with pytest.warns(DataConversionWarning) as raised_warning: - check_input(tuples, type_of_inputs='tuples', - preprocessor=preprocessor, dtype=np.float64, - warn_on_dtype=True, estimator=estimator) - assert str(raised_warning[0].message) == msg - - def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor(): """Checks that a value error is thrown if attempting to convert an input not convertible to float, when using a preprocessor @@ -529,36 +500,6 @@ def test_check_classic_invalid_n_samples(estimator, context, load_points, assert str(raised_error.value) == msg -@pytest.mark.parametrize('estimator, context', - [(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")]) -@pytest.mark.parametrize('load_points, preprocessor', - [(points_prep, mock_preprocessor), - (points_no_prep, None), - (points_no_prep, mock_preprocessor)]) -def test_check_classic_invalid_dtype_convertible(estimator, context, - load_points, - preprocessor): - """Checks that a warning is raised if a convertible input is converted to - float""" - points = load_points().astype(object) # here the object conversion is - # useless for the points_prep case, but this allows to test the - # points_prep case - - if preprocessor is not None: # if the preprocessor is not None we - # overwrite it to have a preprocessor that returns objects - def preprocessor(indices): - # preprocessor that returns objects - return np.ones((indices.shape[0], 3)).astype(object) - - msg = ("Data with input dtype object was converted to float64{}." - .format(context)) - with pytest.warns(DataConversionWarning) as raised_warning: - check_input(points, type_of_inputs='classic', - preprocessor=preprocessor, dtype=np.float64, - warn_on_dtype=True, estimator=estimator) - assert str(raised_warning[0].message) == msg - - @pytest.mark.parametrize('preprocessor, points', [(mock_preprocessor, np.array([['a', 'b'], ['e', 'b']])),