From d6c86474e2c7ec2cf1d76e434fbae4b980956345 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 14:06:11 +0200 Subject: [PATCH 1/3] Remove shogun dependency --- README.rst | 7 ------- doc/getting_started.rst | 8 -------- doc/supervised.rst | 5 ----- metric_learn/lmnn.py | 43 +++------------------------------------- test/test_base_metric.py | 15 +++++++------- 5 files changed, 11 insertions(+), 67 deletions(-) diff --git a/README.rst b/README.rst index 32a9bb90..027e5498 100644 --- a/README.rst +++ b/README.rst @@ -41,13 +41,6 @@ package installed). See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. -**Notes** - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. The two implementations differ slightly, and the C++ version is -more complete. - .. _sphinx documentation: http://metric-learn.github.io/metric-learn/ diff --git a/doc/getting_started.rst b/doc/getting_started.rst index d620e401..5a671d86 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -23,14 +23,6 @@ Alternately, download the source repository and run: (install from commit `a0ed406 `_). - For running the examples only: matplotlib -**Notes** - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. The two implementations differ slightly, and the C++ version is -more complete. - - Quick start =========== diff --git a/doc/supervised.rst b/doc/supervised.rst index 83bf4449..c438294f 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -87,11 +87,6 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. Otherwise, the included pure-Python version will be used. -The two implementations differ slightly, and the C++ version is more complete. - .. topic:: References: .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index c2437b86..f32fb0c6 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -25,7 +25,7 @@ # commonality between LMNN implementations -class _base_LMNN(MahalanobisMixin, TransformerMixin): +class _BaseLMNN(MahalanobisMixin, TransformerMixin): def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, @@ -111,11 +111,11 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, self.n_components = n_components self.num_dims = num_dims self.random_state = random_state - super(_base_LMNN, self).__init__(preprocessor) + super(_BaseLMNN, self).__init__(preprocessor) # slower Python version -class python_LMNN(_base_LMNN): +class LMNN(_BaseLMNN): def fit(self, X, y): if self.num_dims != 'deprecated': @@ -327,40 +327,3 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): if weights is not None: return np.dot(Xab.T, Xab * weights[:,None]) return np.dot(Xab.T, Xab) - - -try: - # use the fast C++ version, if available - from modshogun import LMNN as shogun_LMNN - from modshogun import RealFeatures, MulticlassLabels - - class LMNN(_base_LMNN): - """Large Margin Nearest Neighbor (LMNN) - - Attributes - ---------- - n_iter_ : `int` - The number of iterations the solver has run. - - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - """ - - def fit(self, X, y): - X, y = self._prepare_inputs(X, y, dtype=float, - ensure_min_samples=2) - labels = MulticlassLabels(y) - self._lmnn = shogun_LMNN(RealFeatures(X.T), labels, self.k) - self._lmnn.set_maxiter(self.max_iter) - self._lmnn.set_obj_threshold(self.convergence_tol) - self._lmnn.set_regularization(self.regularization) - self._lmnn.set_stepsize(self.learn_rate) - if self.use_pca: - self._lmnn.train() - else: - self._lmnn.train(np.eye(X.shape[1])) - self.transformer_ = self._lmnn.get_linear_transform(X) - return self - -except ImportError: - LMNN = python_LMNN diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 1b312b35..1c5870d5 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -19,13 +19,14 @@ def test_covariance(self): remove_spaces("Covariance(preprocessor=None)")) def test_lmnn(self): - self.assertRegexpMatches( - str(metric_learn.LMNN()), - r"(python_)?LMNN\(convergence_tol=0.001, init='auto', k=3, " - r"learn_rate=1e-07,\s+" - r"max_iter=1000, min_iter=50, n_components=None, " - r"num_dims='deprecated',\s+preprocessor=None, random_state=None, " - r"regularization=0.5,\s+use_pca=True, verbose=False\)") + self.assertEqual( + remove_spaces(str(metric_learn.LMNN())), + remove_spaces( + "LMNN(convergence_tol=0.001, init='auto', k=3, " + "learn_rate=1e-07, " + "max_iter=1000, min_iter=50, n_components=None, " + "num_dims='deprecated', preprocessor=None, random_state=None, " + "regularization=0.5, use_pca=True, verbose=False)")) def test_nca(self): self.assertEqual(remove_spaces(str(metric_learn.NCA())), From cf0f4ffd69ab084de18602898b65227f30470d1b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 14:14:45 +0200 Subject: [PATCH 2/3] Finalize removing of shogun LMNN --- bench/benchmarks/iris.py | 9 +-------- test/metric_learn_test.py | 18 ++++++++---------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index e3390930..5973f7b8 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -13,16 +13,9 @@ 'NCA': metric_learn.NCA(max_iter=700, n_components=2), 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, chunk_size=2), - 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500), + 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) } -try: - from metric_learn.lmnn import python_LMNN - if python_LMNN is not metric_learn.LMNN: - CLASSES['python_LMNN'] = python_LMNN(k=5, learn_rate=1e-6, verbose=False) -except ImportError: - pass - class IrisDataset(object): params = [sorted(CLASSES)] diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 18643363..568666d2 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -23,7 +23,7 @@ RCA_Supervised, MMC_Supervised, SDML, ITML, LSML) # Import this specially for testing. from metric_learn.constraints import wrap_pairs -from metric_learn.lmnn import python_LMNN, _sum_outer_products +from metric_learn.lmnn import _sum_outer_products def class_separation(X, labels): @@ -213,14 +213,12 @@ def test_bounds_parameters_invalid(bounds): class TestLMNN(MetricTestCase): def test_iris(self): - # Test both impls, if available. - for LMNN_cls in set((LMNN, python_LMNN)): - lmnn = LMNN_cls(k=5, learn_rate=1e-6, verbose=False) - lmnn.fit(self.iris_points, self.iris_labels) + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn.fit(self.iris_points, self.iris_labels) - csep = class_separation(lmnn.transform(self.iris_points), - self.iris_labels) - self.assertLess(csep, 0.25) + csep = class_separation(lmnn.transform(self.iris_points), + self.iris_labels) + self.assertLess(csep, 0.25) def test_loss_grad_lbfgs(self): """Test gradient of loss function @@ -319,7 +317,7 @@ def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with # this issue: https://github.com/metric-learn/metric-learn/issues/88 X, y = make_classification(random_state=0) - lmnn = python_LMNN(verbose=True) + lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() assert "LMNN converged with objective" in out @@ -329,7 +327,7 @@ def test_no_twice_same_objective(capsys): # test that the objective function never has twice the same value # see https://github.com/metric-learn/metric-learn/issues/88 X, y = make_classification(random_state=0) - lmnn = python_LMNN(verbose=True) + lmnn = LMNN(verbose=True) lmnn.fit(X, y) out, _ = capsys.readouterr() lines = re.split("\n+", out) From 3b0242c0a5312d473ca73b22e9c4bc0e2270869a Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 15:55:13 +0200 Subject: [PATCH 3/3] Remove LMNN useless base class --- metric_learn/lmnn.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index dd14c88d..20eeea3b 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -25,8 +25,7 @@ from .base_metric import MahalanobisMixin -# commonality between LMNN implementations -class _BaseLMNN(MahalanobisMixin, TransformerMixin): +class LMNN(MahalanobisMixin, TransformerMixin): def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, @@ -114,11 +113,7 @@ def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, self.n_components = n_components self.num_dims = num_dims self.random_state = random_state - super(_BaseLMNN, self).__init__(preprocessor) - - -# slower Python version -class LMNN(_BaseLMNN): + super(LMNN, self).__init__(preprocessor) def fit(self, X, y): if self.num_dims != 'deprecated':