Skip to content

[MRG] Be compatible with newer scikit-learn #199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ python:
before_install:
- sudo apt-get install liblapack-dev
- pip install --upgrade pip pytest
- pip install wheel cython numpy scipy scikit-learn codecov pytest-cov
- pip install wheel cython numpy scipy codecov pytest-cov
- if $TRAVIS_PYTHON_VERSION == "3.6"; then
pip install scikit-learn;
else
pip install scikit-learn==0.20.3;
fi
- if [[ ($TRAVIS_PYTHON_VERSION == "3.6") ||
($TRAVIS_PYTHON_VERSION == "2.7")]]; then
pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8;
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Metric Learning algorithms in Python.
**Dependencies**

- Python 2.7+, 3.4+
- numpy, scipy, scikit-learn
- numpy, scipy, scikit-learn>=0.20.3

**Optional dependencies**

Expand Down
2 changes: 1 addition & 1 deletion doc/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Alternately, download the source repository and run:
**Dependencies**

- Python 2.7+, 3.4+
- numpy, scipy, scikit-learn
- numpy, scipy, scikit-learn>=0.20.3

**Optional dependencies**

Expand Down
14 changes: 4 additions & 10 deletions metric_learn/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ def check_input(input_data, y=None, preprocessor=None,
dtype='numeric', order=None,
copy=False, force_all_finite=True,
multi_output=False, ensure_min_samples=1,
ensure_min_features=1, y_numeric=False,
warn_on_dtype=False, estimator=None):
ensure_min_features=1, y_numeric=False, estimator=None):
"""Checks that the input format is valid, and converts it if specified
(this is the equivalent of scikit-learn's `check_array` or `check_X_y`).
All arguments following tuple_size are scikit-learn's `check_X_y`
Expand Down Expand Up @@ -88,10 +87,6 @@ def check_input(input_data, y=None, preprocessor=None,
is originally 1D and ``ensure_2d`` is True. Setting to 0 disables
this check.

warn_on_dtype : boolean (default=False)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scikit-learn 0.21 deprecates warn_on_dtype (see: scikit-learn/scikit-learn#13324). Since we never use it (except to test it works), I suggest we get rid of it

Raise DataConversionWarning if the dtype of the input data structure
does not match the requested dtype, causing a memory copy.

estimator : str or estimator instance (default=`None`)
If passed, include the name of the estimator in warning messages.

Expand All @@ -111,7 +106,7 @@ def check_input(input_data, y=None, preprocessor=None,
copy=copy, force_all_finite=force_all_finite,
ensure_min_samples=ensure_min_samples,
ensure_min_features=ensure_min_features,
warn_on_dtype=warn_on_dtype, estimator=estimator)
estimator=estimator)

# We need to convert input_data into a numpy.ndarray if possible, before
# any further checks or conversions, and deal with y if needed. Therefore
Expand Down Expand Up @@ -309,9 +304,8 @@ def __init__(self, X):
accept_sparse=True, dtype=None,
force_all_finite=False,
ensure_2d=False, allow_nd=True,
ensure_min_samples=0,
ensure_min_features=0,
warn_on_dtype=False, estimator=None)
ensure_min_samples=0, ensure_min_features=0,
estimator=None)
self.X = X

def __call__(self, indices):
Expand Down
96 changes: 58 additions & 38 deletions test/test_base_metric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import re
import unittest
import metric_learn
import numpy as np
Expand All @@ -7,84 +8,103 @@
from test.test_utils import ids_metric_learners, metric_learners


def remove_spaces(s):
return re.sub('\s+', '', s)


class TestStringRepr(unittest.TestCase):

def test_covariance(self):
self.assertEqual(str(metric_learn.Covariance()),
"Covariance(preprocessor=None)")
self.assertEqual(remove_spaces(str(metric_learn.Covariance())),
remove_spaces("Covariance(preprocessor=None)"))

def test_lmnn(self):
self.assertRegexpMatches(
str(metric_learn.LMNN()),
r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, "
r"max_iter=1000,\n min_iter=50, preprocessor=None, "
r"regularization=0.5, use_pca=True,\n verbose=False\)")
str(metric_learn.LMNN()),
r"(python_)?LMNN\(convergence_tol=0.001, k=3, learn_rate=1e-07, "
r"max_iter=1000,\s+min_iter=50, preprocessor=None, "
r"regularization=0.5, use_pca=True,\s+verbose=False\)")

def test_nca(self):
self.assertEqual(str(metric_learn.NCA()),
"NCA(max_iter=100, num_dims=None, preprocessor=None, "
"tol=None, verbose=False)")
self.assertEqual(remove_spaces(str(metric_learn.NCA())),
remove_spaces(
"NCA(max_iter=100, num_dims=None, preprocessor=None, "
"tol=None, verbose=False)"))

def test_lfda(self):
self.assertEqual(str(metric_learn.LFDA()),
"LFDA(embedding_type='weighted', k=None, num_dims=None, "
"preprocessor=None)")
self.assertEqual(remove_spaces(str(metric_learn.LFDA())),
remove_spaces(
"LFDA(embedding_type='weighted', k=None, "
"num_dims=None, "
"preprocessor=None)"))

def test_itml(self):
self.assertEqual(str(metric_learn.ITML()), """
self.assertEqual(remove_spaces(str(metric_learn.ITML())),
remove_spaces("""
ITML(A0=None, convergence_threshold=0.001, gamma=1.0, max_iter=1000,
preprocessor=None, verbose=False)
""".strip('\n'))
self.assertEqual(str(metric_learn.ITML_Supervised()), """
"""))
self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())),
remove_spaces("""
ITML_Supervised(A0=None, bounds='deprecated', convergence_threshold=0.001,
gamma=1.0, max_iter=1000, num_constraints=None,
num_labeled='deprecated', preprocessor=None, verbose=False)
""".strip('\n'))
"""))

def test_lsml(self):
self.assertEqual(
str(metric_learn.LSML()),
remove_spaces(str(metric_learn.LSML())),
remove_spaces(
"LSML(max_iter=1000, preprocessor=None, prior=None, tol=0.001, "
"verbose=False)")
self.assertEqual(str(metric_learn.LSML_Supervised()), """
"verbose=False)"))
self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())),
remove_spaces("""
LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated',
preprocessor=None, prior=None, tol=0.001, verbose=False,
weights=None)
""".strip('\n'))
"""))

def test_sdml(self):
self.assertEqual(str(metric_learn.SDML()),
"SDML(balance_param=0.5, preprocessor=None, "
"sparsity_param=0.01, use_cov=True,\n verbose=False)")
self.assertEqual(str(metric_learn.SDML_Supervised()), """
self.assertEqual(remove_spaces(str(metric_learn.SDML())),
remove_spaces(
"SDML(balance_param=0.5, preprocessor=None, "
"sparsity_param=0.01, use_cov=True,"
"\n verbose=False)"))
self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())),
remove_spaces("""
SDML_Supervised(balance_param=0.5, num_constraints=None,
num_labeled='deprecated', preprocessor=None, sparsity_param=0.01,
use_cov=True, verbose=False)
""".strip('\n'))
"""))

def test_rca(self):
self.assertEqual(str(metric_learn.RCA()),
"RCA(num_dims=None, pca_comps=None, preprocessor=None)")
self.assertEqual(str(metric_learn.RCA_Supervised()),
"RCA_Supervised(chunk_size=2, num_chunks=100, "
"num_dims=None, pca_comps=None,\n "
"preprocessor=None)")
self.assertEqual(remove_spaces(str(metric_learn.RCA())),
remove_spaces("RCA(num_dims=None, pca_comps=None, "
"preprocessor=None)"))
self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())),
remove_spaces(
"RCA_Supervised(chunk_size=2, num_chunks=100, "
"num_dims=None, pca_comps=None,\n "
"preprocessor=None)"))

def test_mlkr(self):
self.assertEqual(str(metric_learn.MLKR()),
"MLKR(A0=None, max_iter=1000, num_dims=None, "
"preprocessor=None, tol=None,\n verbose=False)")
self.assertEqual(remove_spaces(str(metric_learn.MLKR())),
remove_spaces(
"MLKR(A0=None, max_iter=1000, num_dims=None, "
"preprocessor=None, tol=None,\n verbose=False)"))

def test_mmc(self):
self.assertEqual(str(metric_learn.MMC()), """
self.assertEqual(remove_spaces(str(metric_learn.MMC())),
remove_spaces("""
MMC(A0=None, convergence_threshold=0.001, diagonal=False, diagonal_c=1.0,
max_iter=100, max_proj=10000, preprocessor=None, verbose=False)
""".strip('\n'))
self.assertEqual(str(metric_learn.MMC_Supervised()), """
"""))
self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())),
remove_spaces("""
MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False,
diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None,
num_labeled='deprecated', preprocessor=None, verbose=False)
""".strip('\n'))
"""))


@pytest.mark.parametrize('estimator, build_dataset', metric_learners,
Expand Down
59 changes: 0 additions & 59 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,35 +299,6 @@ def test_check_tuples_invalid_n_samples(estimator, context, load_tuples,
assert str(raised_error.value) == msg


@pytest.mark.parametrize('estimator, context',
[(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")])
@pytest.mark.parametrize('load_tuples, preprocessor',
[(tuples_prep, mock_preprocessor),
(tuples_no_prep, None),
(tuples_no_prep, mock_preprocessor)])
def test_check_tuples_invalid_dtype_convertible(estimator, context,
load_tuples, preprocessor):
"""Checks that a warning is raised if a convertible input is converted to
float"""
tuples = load_tuples().astype(object) # here the object conversion is
# useless for the tuples_prep case, but this allows to test the
# tuples_prep case

if preprocessor is not None: # if the preprocessor is not None we
# overwrite it to have a preprocessor that returns objects
def preprocessor(indices): #
# preprocessor that returns objects
return np.ones((indices.shape[0], 3)).astype(object)

msg = ("Data with input dtype object was converted to float64{}."
.format(context))
with pytest.warns(DataConversionWarning) as raised_warning:
check_input(tuples, type_of_inputs='tuples',
preprocessor=preprocessor, dtype=np.float64,
warn_on_dtype=True, estimator=estimator)
assert str(raised_warning[0].message) == msg


def test_check_tuples_invalid_dtype_not_convertible_with_preprocessor():
"""Checks that a value error is thrown if attempting to convert an
input not convertible to float, when using a preprocessor
Expand Down Expand Up @@ -529,36 +500,6 @@ def test_check_classic_invalid_n_samples(estimator, context, load_points,
assert str(raised_error.value) == msg


@pytest.mark.parametrize('estimator, context',
[(NCA(), " by NCA"), ('NCA', " by NCA"), (None, "")])
@pytest.mark.parametrize('load_points, preprocessor',
[(points_prep, mock_preprocessor),
(points_no_prep, None),
(points_no_prep, mock_preprocessor)])
def test_check_classic_invalid_dtype_convertible(estimator, context,
load_points,
preprocessor):
"""Checks that a warning is raised if a convertible input is converted to
float"""
points = load_points().astype(object) # here the object conversion is
# useless for the points_prep case, but this allows to test the
# points_prep case

if preprocessor is not None: # if the preprocessor is not None we
# overwrite it to have a preprocessor that returns objects
def preprocessor(indices):
# preprocessor that returns objects
return np.ones((indices.shape[0], 3)).astype(object)

msg = ("Data with input dtype object was converted to float64{}."
.format(context))
with pytest.warns(DataConversionWarning) as raised_warning:
check_input(points, type_of_inputs='classic',
preprocessor=preprocessor, dtype=np.float64,
warn_on_dtype=True, estimator=estimator)
assert str(raised_warning[0].message) == msg


@pytest.mark.parametrize('preprocessor, points',
[(mock_preprocessor, np.array([['a', 'b'],
['e', 'b']])),
Expand Down