From 55140c596a14b770ae7841e4b11e34aa7e6d6a54 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 22 May 2019 11:56:36 +0200 Subject: [PATCH 01/16] Add link to algorithm in the title of sections --- doc/supervised.rst | 16 ++-- doc/weakly_supervised.rst | 165 ++++++++++++++++---------------------- 2 files changed, 75 insertions(+), 106 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index 83bf4449..e2725e57 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -43,8 +43,8 @@ the covariance matrix of the input data. This is a simple baseline method. .. _lmnn: -LMNN ------ +:py:class:`LMNN ` +----------------------------------------- Large Margin Nearest Neighbor Metric Learning (:py:class:`LMNN `) @@ -102,8 +102,8 @@ The two implementations differ slightly, and the C++ version is more complete. .. _nca: -NCA ---- +:py:class:`NCA ` +-------------------------------------- Neighborhood Components Analysis(:py:class:`NCA `) @@ -166,8 +166,8 @@ the sum of probability of being correctly classified: .. _lfda: -LFDA ----- +:py:class:`LFDA ` +----------------------------------------- Local Fisher Discriminant Analysis(:py:class:`LFDA `) @@ -240,8 +240,8 @@ same class are not imposed to be close. .. _mlkr: -MLKR ----- +:py:class:`MLKR ` +----------------------------------------- Metric Learning for Kernel Regression(:py:class:`MLKR `) diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 93720ffc..4ea2ac5f 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -192,8 +192,8 @@ Algorithms .. _itml: -ITML ----- +:py:class:`ITML ` +----------------------------------------- Information Theoretic Metric Learning(:py:class:`ITML `) @@ -269,96 +269,10 @@ is the prior distance metric, set to identity matrix by default, itml/ -.. _lsml: - -LSML ----- - -Metric Learning from Relative Comparisons by Minimizing Squared Residual -(:py:class:`LSML `) - -`LSML` proposes a simple, yet effective, algorithm that minimizes a convex -objective function corresponding to the sum of squared residuals of -constraints. This algorithm uses the constraints in the form of the -relative distance comparisons, such method is especially useful where -pairwise constraints are not natural to obtain, thus pairwise constraints -based algorithms become infeasible to be deployed. Furthermore, its sparsity -extension leads to more stable estimation when the dimension is high and -only a small amount of constraints is given. - -The loss function of each constraint -:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is -denoted as: - -.. math:: - - H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) - - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) - -where :math:`H(\cdot)` is the squared Hinge loss function defined as: - -.. math:: - - H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\ - \,\,x^2 \qquad x>0\end{aligned}\right.\\ - -The summed loss function :math:`L(C)` is the simple sum over all constraints -:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) -: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The -original paper suggested here should be a weighted sum since the confidence -or probability of each constraint might differ. However, for the sake of -simplicity and assumption of no extra knowledge provided, we just deploy -the simple sum here as well as what the authors did in the experiments. - -The distance metric learning problem becomes minimizing the summed loss -function of all constraints plus a regularization term w.r.t. the prior -knowledge: - -.. math:: - - \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, - \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( - \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ - -where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity -by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: - -.. math:: - - D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} - (\mathbf{M}) - -.. topic:: Example Code: - -:: - - from metric_learn import LSML - - quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], - [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], - [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] - - # we want to make closer points where the first feature is close, and - # further if the second feature is close - - lsml = LSML() - lsml.fit(quadruplets) - -.. topic:: References: - - .. [1] Liu et al. - "Metric Learning from Relative Comparisons by Minimizing Squared - Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 - .. _sdml: -======= - -SDML ----- +:py:class:`SDML ` +----------------------------------------- Sparse High-Dimensional Metric Learning (:py:class:`SDML `) @@ -417,8 +331,8 @@ is the off-diagonal L1 norm. .. _rca: -RCA ---- +:py:class:`RCA ` +-------------------------------------- Relative Components Analysis (:py:class:`RCA `) @@ -473,8 +387,8 @@ as the Mahalanobis matrix. .. _mmc: -MMC ---- +:py:class:`MMC ` +-------------------------------------- Metric Learning with Application for Clustering with Side Information (:py:class:`MMC `) @@ -539,11 +453,64 @@ already in the order that points are given in the quadruplet. Algorithms ========== -LSML ----- +.. _lsml: + +:py:class:`LSML ` +----------------------------------------- + +Metric Learning from Relative Comparisons by Minimizing Squared Residual +(:py:class:`LSML `) + +`LSML` proposes a simple, yet effective, algorithm that minimizes a convex +objective function corresponding to the sum of squared residuals of +constraints. This algorithm uses the constraints in the form of the +relative distance comparisons, such method is especially useful where +pairwise constraints are not natural to obtain, thus pairwise constraints +based algorithms become infeasible to be deployed. Furthermore, its sparsity +extension leads to more stable estimation when the dimension is high and +only a small amount of constraints is given. + +The loss function of each constraint +:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is +denoted as: + +.. math:: + + H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) + - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) + +where :math:`H(\cdot)` is the squared Hinge loss function defined as: + +.. math:: -`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared -Residual + H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\ + \,\,x^2 \qquad x>0\end{aligned}\right.\\ + +The summed loss function :math:`L(C)` is the simple sum over all constraints +:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) +: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The +original paper suggested here should be a weighted sum since the confidence +or probability of each constraint might differ. However, for the sake of +simplicity and assumption of no extra knowledge provided, we just deploy +the simple sum here as well as what the authors did in the experiments. + +The distance metric learning problem becomes minimizing the summed loss +function of all constraints plus a regularization term w.r.t. the prior +knowledge: + +.. math:: + + \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, + \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( + \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ + +where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity +by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: + +.. math:: + + D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} + (\mathbf{M}) .. topic:: Example Code: @@ -569,3 +536,5 @@ Residual Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + + From 2ea5c0d1a9f820338df6b08762c636e5ccc6f279 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 22 May 2019 14:30:28 +0200 Subject: [PATCH 02/16] Separate supervised and weakly supervised algorithms in Package Overview --- doc/metric_learn.rst | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index c2472408..40723b40 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -4,15 +4,32 @@ metric_learn package Module Contents --------------- +Base Classes +------------ + .. toctree:: metric_learn.base_metric - metric_learn.itml + + +Supervised Learning Algorithms +------------------------------ + +.. toctree:: + metric_learn.lfda metric_learn.lmnn - metric_learn.lsml metric_learn.mlkr - metric_learn.mmc metric_learn.nca metric_learn.rca + + +Weakly Supervised Learning Algorithms +------------------------------ + +.. toctree:: + + metric_learn.itml + metric_learn.lsml + metric_learn.mmc metric_learn.sdml From f29c2393c14468fb41e2578b4778828d293e69df Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 11 Jun 2019 16:44:09 +0200 Subject: [PATCH 03/16] Improve doc --- .gitignore | 1 + doc/_templates/class.rst | 16 ++ doc/_templates/module.rst | 15 ++ doc/conf.py | 14 ++ doc/metric_learn.base_metric.rst | 7 - doc/metric_learn.constraints.rst | 7 - doc/metric_learn.covariance.rst | 22 --- doc/metric_learn.itml.rst | 28 --- doc/metric_learn.lfda.rst | 31 ---- doc/metric_learn.lmnn.rst | 34 ---- doc/metric_learn.lsml.rst | 28 --- doc/metric_learn.mlkr.rst | 28 --- doc/metric_learn.mmc.rst | 28 --- doc/metric_learn.nca.rst | 29 --- doc/metric_learn.rca.rst | 28 --- doc/metric_learn.rst | 51 ++++-- doc/metric_learn.sdml.rst | 27 --- doc/supervised.rst | 18 +- doc/weakly_supervised.rst | 20 +-- metric_learn/base_metric.py | 24 +-- metric_learn/covariance.py | 21 ++- metric_learn/itml.py | 264 ++++++++++++++------------- metric_learn/lfda.py | 94 ++++++---- metric_learn/lmnn.py | 210 ++++++++++++---------- metric_learn/lsml.py | 234 +++++++++++++----------- metric_learn/mlkr.py | 175 +++++++++--------- metric_learn/mmc.py | 300 ++++++++++++++++--------------- metric_learn/nca.py | 183 ++++++++++--------- metric_learn/rca.py | 129 +++++++------ metric_learn/sdml.py | 261 +++++++++++++++------------ 30 files changed, 1139 insertions(+), 1188 deletions(-) create mode 100644 doc/_templates/class.rst create mode 100644 doc/_templates/module.rst delete mode 100644 doc/metric_learn.base_metric.rst delete mode 100644 doc/metric_learn.constraints.rst delete mode 100644 doc/metric_learn.covariance.rst delete mode 100644 doc/metric_learn.itml.rst delete mode 100644 doc/metric_learn.lfda.rst delete mode 100644 doc/metric_learn.lmnn.rst delete mode 100644 doc/metric_learn.lsml.rst delete mode 100644 doc/metric_learn.mlkr.rst delete mode 100644 doc/metric_learn.mmc.rst delete mode 100644 doc/metric_learn.nca.rst delete mode 100644 doc/metric_learn.rca.rst delete mode 100644 doc/metric_learn.sdml.rst diff --git a/.gitignore b/.gitignore index 449f70ea..8321c7d2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* +doc/generated/* \ No newline at end of file diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst new file mode 100644 index 00000000..f0c1b5bc --- /dev/null +++ b/doc/_templates/class.rst @@ -0,0 +1,16 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: + :undoc-members: + :inherited-members: + :special-members: __init__ + +.. include:: {{module}}.{{objname}}.examples + +.. raw:: html + +
diff --git a/doc/_templates/module.rst b/doc/_templates/module.rst new file mode 100644 index 00000000..26cc83fa --- /dev/null +++ b/doc/_templates/module.rst @@ -0,0 +1,15 @@ +:mod:`{{module}}` module +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. automodule:: metric_learn.base_metric + :members: + :undoc-members: + :show-inheritance: + +.. include:: {{module}}.examples + +.. raw:: html + +
diff --git a/doc/conf.py b/doc/conf.py index a11f8bba..a5114d47 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import sys +import os extensions = [ 'sphinx.ext.autodoc', @@ -50,3 +51,16 @@ 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), 'scikit-learn': ('https://scikit-learn.org/stable/', None) } + + +# sphinx-gallery configuration +sphinx_gallery_conf = { + # to generate mini-galleries at the end of each docstring in the API + # section: (see https://sphinx-gallery.github.io/configuration.html + # #references-to-examples) + 'doc_module': 'metric_learn', + 'backreferences_dir': os.path.join('generated'), +} + +# generate autosummary even if no references +autosummary_generate = True diff --git a/doc/metric_learn.base_metric.rst b/doc/metric_learn.base_metric.rst deleted file mode 100644 index 050a360b..00000000 --- a/doc/metric_learn.base_metric.rst +++ /dev/null @@ -1,7 +0,0 @@ -metric_learn.base_metric module -=============================== - -.. automodule:: metric_learn.base_metric - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/metric_learn.constraints.rst b/doc/metric_learn.constraints.rst deleted file mode 100644 index 97d79002..00000000 --- a/doc/metric_learn.constraints.rst +++ /dev/null @@ -1,7 +0,0 @@ -metric_learn.constraints module -=============================== - -.. automodule:: metric_learn.constraints - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst deleted file mode 100644 index 493878c1..00000000 --- a/doc/metric_learn.covariance.rst +++ /dev/null @@ -1,22 +0,0 @@ -Covariance metric (baseline method) -=================================== - -.. automodule:: metric_learn.covariance - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import Covariance - from sklearn.datasets import load_iris - - iris = load_iris()['data'] - - cov = Covariance().fit(iris) - x = cov.transform(iris) diff --git a/doc/metric_learn.itml.rst b/doc/metric_learn.itml.rst deleted file mode 100644 index addb4c76..00000000 --- a/doc/metric_learn.itml.rst +++ /dev/null @@ -1,28 +0,0 @@ -Information Theoretic Metric Learning (ITML) -============================================ - -.. automodule:: metric_learn.itml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import ITML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - itml = ITML_Supervised(num_constraints=200) - itml.fit(X, Y) - -References ----------- -`Information-theoretic Metric Learning `_ Jason V. Davis, et al. diff --git a/doc/metric_learn.lfda.rst b/doc/metric_learn.lfda.rst deleted file mode 100644 index 41088a68..00000000 --- a/doc/metric_learn.lfda.rst +++ /dev/null @@ -1,31 +0,0 @@ -Local Fisher Discriminant Analysis (LFDA) -========================================= - -.. automodule:: metric_learn.lfda - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import LFDA - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lfda = LFDA(k=2, dim=2) - lfda.fit(X, Y) - -References ------------------- -`Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_ Masashi Sugiyama. - -`Local Fisher Discriminant Analysis on Beer Style Clustering `_ Yuan Tang. diff --git a/doc/metric_learn.lmnn.rst b/doc/metric_learn.lmnn.rst deleted file mode 100644 index bc65161e..00000000 --- a/doc/metric_learn.lmnn.rst +++ /dev/null @@ -1,34 +0,0 @@ -Large Margin Nearest Neighbor (LMNN) -==================================== - -.. automodule:: metric_learn.lmnn - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import LMNN - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lmnn = LMNN(k=5, learn_rate=1e-6) - lmnn.fit(X, Y, verbose=False) - -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. Otherwise, the included pure-Python version will be used. -The two implementations differ slightly, and the C++ version is more complete. - -References ----------- -`Distance Metric Learning for Large Margin Nearest Neighbor Classification `_ Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst deleted file mode 100644 index 0deae4e6..00000000 --- a/doc/metric_learn.lsml.rst +++ /dev/null @@ -1,28 +0,0 @@ -Least Squares Metric Learning (LSML) -==================================== - -.. automodule:: metric_learn.lsml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import LSML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(X, Y) - -References ----------- - diff --git a/doc/metric_learn.mlkr.rst b/doc/metric_learn.mlkr.rst deleted file mode 100644 index f71697de..00000000 --- a/doc/metric_learn.mlkr.rst +++ /dev/null @@ -1,28 +0,0 @@ -Metric Learning for Kernel Regression (MLKR) -============================================ - -.. automodule:: metric_learn.mlkr - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import MLKR - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - mlkr = MLKR() - mlkr.fit(X, Y) - -References ----------- -`Information-theoretic Metric Learning `_ Jason V. Davis, et al. diff --git a/doc/metric_learn.mmc.rst b/doc/metric_learn.mmc.rst deleted file mode 100644 index bb9031ba..00000000 --- a/doc/metric_learn.mmc.rst +++ /dev/null @@ -1,28 +0,0 @@ -Mahalanobis Metric Learning for Clustering (MMC) -================================================ - -.. automodule:: metric_learn.mmc - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import MMC_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - mmc = MMC_Supervised(num_constraints=200) - mmc.fit(X, Y) - -References ----------- -`Distance metric learning with application to clustering with side-information `_ Xing, Jordan, Russell, Ng. diff --git a/doc/metric_learn.nca.rst b/doc/metric_learn.nca.rst deleted file mode 100644 index 00bc4eac..00000000 --- a/doc/metric_learn.nca.rst +++ /dev/null @@ -1,29 +0,0 @@ -Neighborhood Components Analysis (NCA) -====================================== - -.. automodule:: metric_learn.nca - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - import numpy as np - from metric_learn import NCA - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - nca = NCA(max_iter=1000) - nca.fit(X, Y) - -References ----------- - diff --git a/doc/metric_learn.rca.rst b/doc/metric_learn.rca.rst deleted file mode 100644 index 027d583b..00000000 --- a/doc/metric_learn.rca.rst +++ /dev/null @@ -1,28 +0,0 @@ -Relative Components Analysis (RCA) -================================== - -.. automodule:: metric_learn.rca - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import RCA_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - rca = RCA_Supervised(num_chunks=30, chunk_size=2) - rca.fit(X, Y) - -References ------------------- -`Adjustment learning and relevant component analysis `_ Noam Shental, et al. diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 4e43a480..55479672 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -7,30 +7,47 @@ Module Contents Base Classes ------------ -.. toctree:: - - metric_learn.constraints - metric_learn.base_metric +.. autosummary:: + :toctree: generated/ + :template: module.rst + metric_learn.base_metric + metric_learn.constraints Supervised Learning Algorithms ------------------------------ +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metric_learn.LFDA + metric_learn.LMNN + metric_learn.MLKR + metric_learn.NCA + metric_learn.RCA + metric_learn.ITML_Supervised + metric_learn.LSML_Supervised + metric_learn.MMC_Supervised + metric_learn.SDML_Supervised + metric_learn.RCA_Supervised -.. toctree:: +Weakly Supervised Learning Algorithms +------------------------------ - metric_learn.lfda - metric_learn.lmnn - metric_learn.mlkr - metric_learn.nca - metric_learn.rca +.. autosummary:: + :toctree: generated/ + :template: class.rst + metric_learn.ITML + metric_learn.LSML + metric_learn.MMC + metric_learn.SDML -Weakly Supervised Learning Algorithms ------------------------------- +Unsupervised Learning Algorithms +-------------------------------- -.. toctree:: +.. autosummary:: + :toctree: generated/ + :template: class.rst - metric_learn.itml - metric_learn.lsml - metric_learn.mmc - metric_learn.sdml + metric_learn.Covariance \ No newline at end of file diff --git a/doc/metric_learn.sdml.rst b/doc/metric_learn.sdml.rst deleted file mode 100644 index 3e350a70..00000000 --- a/doc/metric_learn.sdml.rst +++ /dev/null @@ -1,27 +0,0 @@ -Sparse Determinant Metric Learning (SDML) -========================================= - -.. automodule:: metric_learn.sdml - :members: - :undoc-members: - :inherited-members: - :show-inheritance: - :special-members: __init__ - -Example Code ------------- - -:: - - from metric_learn import SDML_Supervised - from sklearn.datasets import load_iris - - iris_data = load_iris() - X = iris_data['data'] - Y = iris_data['target'] - - sdml = SDML_Supervised(num_constraints=200) - sdml.fit(X, Y) - -References ------------------- diff --git a/doc/supervised.rst b/doc/supervised.rst index e2725e57..414560c4 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -43,11 +43,11 @@ the covariance matrix of the input data. This is a simple baseline method. .. _lmnn: -:py:class:`LMNN ` +:py:class:`LMNN ` ----------------------------------------- Large Margin Nearest Neighbor Metric Learning -(:py:class:`LMNN `) +(:py:class:`LMNN `) `LMNN` learns a Mahalanobis distance metric in the kNN classification setting. The learned metric attempts to keep close k-nearest neighbors @@ -102,10 +102,10 @@ The two implementations differ slightly, and the C++ version is more complete. .. _nca: -:py:class:`NCA ` +:py:class:`NCA ` -------------------------------------- -Neighborhood Components Analysis(:py:class:`NCA `) +Neighborhood Components Analysis(:py:class:`NCA `) `NCA` is a distance metric learning algorithm which aims to improve the accuracy of nearest neighbors classification compared to the standard @@ -166,10 +166,10 @@ the sum of probability of being correctly classified: .. _lfda: -:py:class:`LFDA ` +:py:class:`LFDA ` ----------------------------------------- -Local Fisher Discriminant Analysis(:py:class:`LFDA `) +Local Fisher Discriminant Analysis(:py:class:`LFDA `) `LFDA` is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multi-modality, where one ore more classes @@ -240,10 +240,10 @@ same class are not imposed to be close. .. _mlkr: -:py:class:`MLKR ` +:py:class:`MLKR ` ----------------------------------------- -Metric Learning for Kernel Regression(:py:class:`MLKR `) +Metric Learning for Kernel Regression(:py:class:`MLKR `) `MLKR` is an algorithm for supervised metric learning, which learns a distance function by directly minimizing the leave-one-out regression error. @@ -303,6 +303,8 @@ calculating a weighted average of all the training samples: Gerald Tesauro +.. _supervised_version: + Supervised versions of weakly-supervised algorithms --------------------------------------------------- diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 3d367946..71ccb334 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -193,10 +193,10 @@ Algorithms .. _itml: -:py:class:`ITML ` +:py:class:`ITML ` ----------------------------------------- -Information Theoretic Metric Learning(:py:class:`ITML `) +Information Theoretic Metric Learning(:py:class:`ITML `) `ITML` minimizes the (differential) relative entropy, aka Kullback–Leibler divergence, between two multivariate Gaussians subject to constraints on the @@ -272,11 +272,11 @@ is the prior distance metric, set to identity matrix by default, .. _sdml: -:py:class:`SDML ` +:py:class:`SDML ` ----------------------------------------- Sparse High-Dimensional Metric Learning -(:py:class:`SDML `) +(:py:class:`SDML `) `SDML` is an efficient sparse metric learning in high-dimensional space via double regularization: an L1-penalization on the off-diagonal elements of the @@ -332,10 +332,10 @@ is the off-diagonal L1 norm. .. _rca: -:py:class:`RCA ` +:py:class:`RCA ` -------------------------------------- -Relative Components Analysis (:py:class:`RCA `) +Relative Components Analysis (:py:class:`RCA `) `RCA` learns a full rank Mahalanobis distance metric based on a weighted sum of in-chunklets covariance matrices. It applies a global linear transformation to @@ -388,11 +388,11 @@ as the Mahalanobis matrix. .. _mmc: -:py:class:`MMC ` +:py:class:`MMC ` -------------------------------------- Metric Learning with Application for Clustering with Side Information -(:py:class:`MMC `) +(:py:class:`MMC `) `MMC` minimizes the sum of squared distances between similar points, while enforcing the sum of distances between dissimilar ones to be greater than one. @@ -456,11 +456,11 @@ Algorithms .. _lsml: -:py:class:`LSML ` +:py:class:`LSML ` ----------------------------------------- Metric Learning from Relative Comparisons by Minimizing Squared Residual -(:py:class:`LSML `) +(:py:class:`LSML `) `LSML` proposes a simple, yet effective, algorithm that minimizes a convex objective function corresponding to the sum of squared residuals of diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 856591cb..8fc209e0 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -10,16 +10,16 @@ class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): + """Base class for all metric-learners - def __init__(self, preprocessor=None): - """ + Parameters + ---------- + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + """ - Parameters - ---------- - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - """ + def __init__(self, preprocessor=None): self.preprocessor = preprocessor @abstractmethod @@ -277,6 +277,8 @@ def metric_fun(u, v, squared=False): get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__ def metric(self): + """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix` + instead""" # TODO: remove this method in version 0.6.0 warnings.warn(("`metric` is deprecated since version 0.5.0 and will be " "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."), @@ -578,7 +580,7 @@ def predict(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to predict, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner @@ -607,7 +609,7 @@ def decision_function(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to predict, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner @@ -630,7 +632,7 @@ def score(self, quadruplets): Parameters ---------- - quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or + quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \ (n_quadruplets, 4) 3D Array of quadruplets to score, with each row corresponding to four points, or 2D array of indices of quadruplets if the metric learner diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 19dad5d8..a973633f 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -1,11 +1,5 @@ """ Covariance metric (baseline method) - -This method does not "learn" anything, rather it calculates -the covariance matrix of the input data. - -This is a simple baseline method first introduced in -On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 """ from __future__ import absolute_import @@ -20,11 +14,26 @@ class Covariance(MahalanobisMixin, TransformerMixin): """Covariance metric (baseline method) + This method does not "learn" anything, rather it calculates + the covariance matrix of the input data. + + This is a simple baseline method first introduced in + On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + Examples + -------- + >>> from metric_learn import Covariance + >>> from sklearn.datasets import load_iris + >>> iris = load_iris()['data'] + >>> cov = Covariance().fit(iris) + >>> x = cov.transform(iris) + """ def __init__(self, preprocessor=None): diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 21303c18..51e34667 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -1,17 +1,5 @@ -r""" -Information Theoretic Metric Learning(ITML) - -`ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler -divergence, between two multivariate Gaussians subject to constraints on the -associated Mahalanobis distance, which can be formulated into a Bregman -optimization problem by minimizing the LogDet divergence subject to -linear constraints. This algorithm can handle a wide variety of constraints -and can optionally incorporate a prior on the distance function. Unlike some -other methods, `ITML` does not rely on an eigenvalue computation or -semi-definite programming. - -Read more in the :ref:`User Guide `. - +""" +Information Theoretic Metric Learning (ITML) """ from __future__ import print_function, absolute_import @@ -34,55 +22,6 @@ class _BaseITML(MahalanobisMixin): def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, prior='identity', A0='deprecated', verbose=False, preprocessor=None, random_state=None): - """Initialize ITML. - - Parameters - ---------- - gamma : float, optional - value for slack variables - - max_iter : int, optional - - convergence_threshold : float, optional - - prior : string or numpy array, optional (default='identity') - The Mahalanobis matrix to use as a prior. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ self.gamma = gamma self.max_iter = max_iter self.convergence_threshold = convergence_threshold @@ -172,6 +111,64 @@ def _fit(self, pairs, y, bounds=None): class ITML(_BaseITML, _PairsClassifierMixin): """Information Theoretic Metric Learning (ITML) + `ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler + divergence, between two multivariate Gaussians subject to constraints on the + associated Mahalanobis distance, which can be formulated into a Bregman + optimization problem by minimizing the LogDet divergence subject to + linear constraints. This algorithm can handle a wide variety of constraints + and can optionally incorporate a prior on the distance function. Unlike some + other methods, `ITML` does not rely on an eigenvalue computation or + semi-definite programming. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + gamma : float, optional + value for slack variables + + max_iter : int, optional + + convergence_threshold : float, optional + + prior : string or numpy array, optional (default='identity') + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -194,6 +191,22 @@ class ITML(_BaseITML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import ITML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> itml = ITML_Supervised(num_constraints=200) + >>> itml.fit(X, Y) + + References + ---------- + .. [1] `Information-theoretic Metric Learning + `_ Jason V. Davis, et al. """ def fit(self, pairs, y, bounds=None, calibration_params=None): @@ -204,7 +217,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): Parameters ---------- - pairs: array-like, shape=(n_constraints, 2, n_features) or + pairs: array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -240,6 +253,64 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): class ITML_Supervised(_BaseITML, TransformerMixin): """Supervised version of Information Theoretic Metric Learning (ITML) + `ITML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `ITML` for training. + + Parameters + ---------- + gamma : float, optional + value for slack variables + max_iter : int, optional + convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + bounds : Not used + .. deprecated:: 0.5.0 + `bounds` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Set `bounds` at fit time instead : + `itml_supervised.fit(X, y, bounds=...)` + + prior : string or numpy array, optional (default='identity') + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + A0 : Not used + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + + Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) @@ -257,71 +328,18 @@ class ITML_Supervised(_BaseITML, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + See Also + -------- + metric_learn.ITML : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, num_labeled='deprecated', num_constraints=None, bounds='deprecated', prior='identity', A0='deprecated', verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `ITML`. - - `ITML_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `ITML` for training. - - Parameters - ---------- - gamma : float, optional - value for slack variables - max_iter : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - bounds : Not used - .. deprecated:: 0.5.0 - `bounds` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Set `bounds` at fit time instead : - `itml_supervised.fit(X, y, bounds=...)` - - prior : string or numpy array, optional (default='identity') - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, A0=A0, prior=prior, verbose=verbose, diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 1851a734..7a0bb80a 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -1,13 +1,5 @@ -r""" -Local Fisher Discriminant Analysis(LFDA) - -LFDA is a linear supervised dimensionality reduction method. It is -particularly useful when dealing with multimodality, where one ore more classes -consist of separate clusters in input space. The core optimization problem of -LFDA is solved as a generalized eigenvalue problem. - -Read more in the :ref:`User Guide `. - +""" +Local Fisher Discriminant Analysis (LFDA) """ from __future__ import division, absolute_import import numpy as np @@ -26,42 +18,68 @@ class LFDA(MahalanobisMixin, TransformerMixin): Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction Sugiyama, ICML 2006 - Attributes + LFDA is a linear supervised dimensionality reduction method. It is + particularly useful when dealing with multimodality, where one ore more + classes consist of separate clusters in input space. The core optimization + problem of LFDA is solved as a generalized eigenvalue problem. + + Read more in the :ref:`User Guide `. + + Parameters ---------- - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - ''' + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). - def __init__(self, n_components=None, num_dims='deprecated', - k=None, embedding_type='weighted', preprocessor=None): - ''' - Initialize LFDA. + num_dims : Not used - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. - num_dims : Not used + k : int, optional + Number of nearest neighbors used in local scaling method. + Defaults to min(7, n_components - 1). - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + embedding_type : str, optional + Type of metric in the embedding space (default: 'weighted') + 'weighted' - weighted eigenvectors + 'orthonormalized' - orthonormalized + 'plain' - raw eigenvectors - k : int, optional - Number of nearest neighbors used in local scaling method. - Defaults to min(7, n_components - 1). + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + Attributes + ---------- + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + The learned linear transformation ``L``. - embedding_type : str, optional - Type of metric in the embedding space (default: 'weighted') - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import LFDA + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lfda = LFDA(k=2, dim=2) + >>> lfda.fit(X, Y) + + References + ------------------ + .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher + Discriminant Analysis `_ + Masashi Sugiyama. + + .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering + `_ Yuan Tang. + ''' - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - ''' + def __init__(self, n_components=None, num_dims='deprecated', + k=None, embedding_type='weighted', preprocessor=None): if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) self.n_components = n_components diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index c2437b86..00acb75e 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,16 +1,7 @@ -r""" -Large Margin Nearest Neighbor Metric learning(LMNN) - -LMNN learns a Mahalanobis distance metric in the kNN classification -setting. The learned metric attempts to keep close k-nearest neighbors -from the same class, while keeping examples from different classes -separated by a large margin. This algorithm makes no assumptions about -the distribution of the data. - -Read more in the :ref:`User Guide `. - """ -#TODO: periodic recalculation of impostors, PCA initialization +Large Margin Nearest Neighbor Metric learning (LMNN) +""" +# TODO: periodic recalculation of impostors, PCA initialization from __future__ import print_function, absolute_import import numpy as np @@ -26,79 +17,124 @@ # commonality between LMNN implementations class _base_LMNN(MahalanobisMixin, TransformerMixin): + """Large Margin Nearest Neighbor (LMNN) + + LMNN learns a Mahalanobis distance metric in the kNN classification + setting. The learned metric attempts to keep close k-nearest neighbors + from the same class, while keeping examples from different classes + separated by a large margin. This algorithm makes no assumptions about + the distribution of the data. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + k : int, optional + Number of neighbors to consider, not including self-edges. + + regularization: float, optional + Weighting of pull and push terms, with 0.5 meaning equal weight. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + Attributes + ---------- + n_iter_ : `int` + The number of iterations the solver has run. + + transformer_ : `numpy.ndarray`, shape=(n_components, n_features) + The learned linear transformation ``L``. + + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import LMNN + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lmnn = LMNN(k=5, learn_rate=1e-6) + >>> lmnn.fit(X, Y, verbose=False) + + Notes + ----- + + If a recent version of the Shogun Python modular (``modshogun``) library + is available, the LMNN implementation will use the fast C++ version from + there. Otherwise, the included pure-Python version will be used. + The two implementations differ slightly, and the C++ version is more + complete. + + References + ---------- + .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor + Classification `_ + Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul + """ + def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, n_components=None, num_dims='deprecated', random_state=None): - """Initialize the LMNN object. - - Parameters - ---------- - init : string or numpy array, optional (default='auto') - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - k : int, optional - Number of neighbors to consider, not including self-edges. - - regularization: float, optional - Weighting of pull and push terms, with 0.5 meaning equal weight. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.init = init self.k = k self.min_iter = min_iter @@ -335,16 +371,6 @@ def _sum_outer_products(data, a_inds, b_inds, weights=None): from modshogun import RealFeatures, MulticlassLabels class LMNN(_base_LMNN): - """Large Margin Nearest Neighbor (LMNN) - - Attributes - ---------- - n_iter_ : `int` - The number of iterations the solver has run. - - transformer_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. - """ def fit(self, X, y): X, y = self._prepare_inputs(X, y, dtype=float, @@ -364,3 +390,5 @@ def fit(self, X, y): except ImportError: LMNN = python_LMNN + +LMNN.__doc__ == _base_LMNN.__doc__ diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 4350b003..eb1a97be 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -1,17 +1,5 @@ -r""" -Metric Learning from Relative Comparisons by Minimizing Squared Residual(LSML) - -`LSML` proposes a simple, yet effective, algorithm that minimizes a convex -objective function corresponding to the sum of squared residuals of -constraints. This algorithm uses the constraints in the form of the -relative distance comparisons, such method is especially useful where -pairwise constraints are not natural to obtain, thus pairwise constraints -based algorithms become infeasible to be deployed. Furthermore, its sparsity -extension leads to more stable estimation when the dimension is high and -only a small amount of constraints is given. - -Read more in the :ref:`User Guide `. - +""" +Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML) """ from __future__ import print_function, absolute_import, division @@ -33,46 +21,6 @@ class _BaseLSML(MahalanobisMixin): def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False, preprocessor=None, random_state=None): - """Initialize LSML. - - Parameters - ---------- - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - tol : float, optional - max_iter : int, optional - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ self.prior = prior self.tol = tol self.max_iter = max_iter @@ -178,6 +126,55 @@ def _gradient(self, metric, vab, vcd, prior_inv): class LSML(_BaseLSML, _QuadrupletsClassifierMixin): """Least Squared-residual Metric Learning (LSML) + `LSML` proposes a simple, yet effective, algorithm that minimizes a convex + objective function corresponding to the sum of squared residuals of + constraints. This algorithm uses the constraints in the form of the + relative distance comparisons, such method is especially useful where + pairwise constraints are not natural to obtain, thus pairwise constraints + based algorithms become infeasible to be deployed. Furthermore, its sparsity + extension leads to more stable estimation when the dimension is high and + only a small amount of constraints is given. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + tol : float, optional + max_iter : int, optional + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- n_iter_ : `int` @@ -186,6 +183,31 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + Examples + -------- + >>> from metric_learn import LSML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lsml = LSML_Supervised(num_constraints=200) + >>> lsml.fit(X, Y) + + References + ---------- + .. [1] Liu et al. `Metric Learning from Relative Comparisons by Minimizing + Squared Residual + `_. ICDM 2012. + + .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + + See Also + -------- + metric_learn.LSML : The original weakly-supervised algorithm + + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def fit(self, quadruplets, weights=None): @@ -193,7 +215,7 @@ def fit(self, quadruplets, weights=None): Parameters ---------- - quadruplets : array-like, shape=(n_constraints, 4, n_features) or + quadruplets : array-like, shape=(n_constraints, 4, n_features) or \ (n_constraints, 4) 3D array-like of quadruplets of points or 2D array of quadruplets of indicators. In order to supervise the algorithm in the right way, we @@ -214,6 +236,56 @@ def fit(self, quadruplets, weights=None): class LSML_Supervised(_BaseLSML, TransformerMixin): """Supervised version of Least Squared-residual Metric Learning (LSML) + `LSML_Supervised` creates quadruplets from labeled samples by taking two + samples from the same class, and two samples from different classes. + This way it builds quadruplets where the two first points must be more + similar than the two last points. + + Parameters + ---------- + tol : float, optional + max_iter : int, optional + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + weights : (m,) array of floats, optional + scale factor for each constraint + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- n_iter_ : `int` @@ -227,58 +299,6 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled='deprecated', num_constraints=None, weights=None, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `LSML`. - - `LSML_Supervised` creates quadruplets from labeled samples by taking two - samples from the same class, and two samples from different classes. - This way it builds quadruplets where the two first points must be more - similar than the two last points. - - Parameters - ---------- - tol : float, optional - max_iter : int, optional - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - weights : (m,) array of floats, optional - scale factor for each constraint - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 9e9cf433..eab6a60a 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -1,13 +1,5 @@ -r""" -Metric Learning for Kernel Regression(MLKR) - -MLKR is an algorithm for supervised metric learning, which learns a -distance function by directly minimizing the leave-one-out regression error. -This algorithm can also be viewed as a supervised variation of PCA and can be -used for dimensionality reduction and high dimensional data visualization. - -Read more in the :ref:`User Guide `. - +""" +Metric Learning for Kernel Regression (MLKR) """ from __future__ import division, print_function import time @@ -31,6 +23,81 @@ class MLKR(MahalanobisMixin, TransformerMixin): """Metric Learning for Kernel Regression (MLKR) + MLKR is an algorithm for supervised metric learning, which learns a + distance function by directly minimizing the leave-one-out regression error. + This algorithm can also be viewed as a supervised variation of PCA and can be + used for dimensionality reduction and high dimensional data visualization. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components < min(n_features, n_samples)``, + we use 'pca', as it projects data in meaningful directions (those + of higher variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``num_dims`` is not None, n_features_a must match it. + + A0: Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + + tol: float, optional (default=None) + Convergence tolerance for the optimization. + + max_iter: int, optional + Cap on number of conjugate gradient iterations. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + Attributes ---------- n_iter_ : `int` @@ -38,82 +105,28 @@ class MLKR(MahalanobisMixin, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. + + Examples + -------- + + >>> from metric_learn import MLKR + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mlkr = MLKR() + >>> mlkr.fit(X, Y) + + References + ---------- + .. [1] `Information-theoretic Metric Learning + `_ Jason V. Davis, et al. """ def __init__(self, n_components=None, num_dims='deprecated', init=None, A0='deprecated', tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None): - """ - Initialize MLKR. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components < min(n_features, n_samples)``, - we use 'pca', as it projects data in meaningful directions (those - of higher variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``num_dims`` is not None, n_features_a must match it. - - A0: Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - - tol: float, optional (default=None) - Convergence tolerance for the optimization. - - max_iter: int, optional - Cap on number of conjugate gradient iterations. - - verbose : bool, optional (default=False) - Whether to print progress messages or not. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.n_components = n_components self.num_dims = num_dims self.init = init diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index b3e6c203..ac2199c9 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,21 +1,4 @@ -r""" -Metric Learning with Application for Clustering with Side Information(MMC) - -MMC minimizes the sum of squared distances between similar points, while -enforcing the sum of distances between dissimilar ones to be greater than one. -This leads to a convex and, thus, local-minima-free optimization problem that -can be solved efficiently. -However, the algorithm involves the computation of eigenvalues, which is the -main speed-bottleneck. Since it has initially been designed for clustering -applications, one of the implicit assumptions of MMC is that all classes form -a compact set, i.e., follow a unimodal distribution, which restricts the -possible use-cases of this method. However, it is one of the earliest and a -still often cited technique. - -Read more in the :ref:`User Guide `. - -""" - +"""Mahalanobis Metric for Clustering (MMC)""" from __future__ import print_function, absolute_import, division import warnings import numpy as np @@ -30,7 +13,6 @@ class _BaseMMC(MahalanobisMixin): - """Mahalanobis Metric for Clustering (MMC)""" _tuple_size = 2 # constraints are pairs @@ -38,61 +20,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, init=None, A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): - """Initialize MMC. - Parameters - ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - An SPD matrix of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions. The initialization will then - be the diagonal coefficients of the matrix given as 'init'. - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. - """ self.max_iter = max_iter self.max_proj = max_proj self.convergence_threshold = convergence_threshold @@ -403,6 +330,74 @@ def _D_constraint(self, neg_pairs, w): class MMC(_BaseMMC, _PairsClassifierMixin): """Mahalanobis Metric for Clustering (MMC) + MMC minimizes the sum of squared distances between similar points, while + enforcing the sum of distances between dissimilar ones to be greater than + one. This leads to a convex and, thus, local-minima-free optimization + problem that can be solved efficiently. + However, the algorithm involves the computation of eigenvalues, which is the + main speed-bottleneck. Since it has initially been designed for clustering + applications, one of the implicit assumptions of MMC is that all classes form + a compact set, i.e., follow a unimodal distribution, which restricts the + possible use-cases of this method. However, it is one of the earliest and a + still often cited technique. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. + Attributes ---------- n_iter_ : `int` @@ -416,6 +411,29 @@ class MMC(_BaseMMC, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import MMC_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mmc = MMC_Supervised(num_constraints=200) + >>> mmc.fit(X, Y) + + References + ---------- + .. [1] `Distance metric learning with application to clustering with + side-information `_ + Xing, Jordan, Russell, Ng. + + See Also + -------- + metric_learn.MMC : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def fit(self, pairs, y, calibration_params=None): @@ -426,7 +444,7 @@ def fit(self, pairs, y, calibration_params=None): Parameters ---------- - pairs : array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -453,6 +471,73 @@ def fit(self, pairs, y, calibration_params=None): class MMC_Supervised(_BaseMMC, TransformerMixin): """Supervised version of Mahalanobis Metric for Clustering (MMC) + `MMC_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `MMC` for training. + + Parameters + ---------- + max_iter : int, optional + max_proj : int, optional + convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints: int, optional + number of constraints to generate + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A numpy array of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + Mahalanobis matrix. + + `MMC_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `MMC` for training. + Attributes ---------- n_iter_ : `int` @@ -467,71 +552,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, num_labeled='deprecated', num_constraints=None, init=None, A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `MMC`. - - `MMC_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `MMC` for training. - - Parameters - ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A numpy array of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. - """ _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, convergence_threshold=convergence_threshold, init=init, A0=A0, diagonal=diagonal, diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 1626e02f..9411eff0 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -1,15 +1,5 @@ -r""" -Neighborhood Components Analysis(NCA) - -NCA is a distance metric learning algorithm which aims to improve the -accuracy of nearest neighbors classification compared to the standard -Euclidean distance. The algorithm directly maximizes a stochastic variant -of the leave-one-out k-nearest neighbors(KNN) score on the training set. -It can also learn a low-dimensional linear transformation of data that can -be used for data visualization and fast classification. - -Read more in the :ref:`User Guide `. - +""" +Neighborhood Components Analysis (NCA) """ from __future__ import absolute_import @@ -32,6 +22,95 @@ class NCA(MahalanobisMixin, TransformerMixin): """Neighborhood Components Analysis (NCA) + NCA is a distance metric learning algorithm which aims to improve the + accuracy of nearest neighbors classification compared to the standard + Euclidean distance. The algorithm directly maximizes a stochastic variant + of the leave-one-out k-nearest neighbors(KNN) score on the training set. + It can also learn a low-dimensional linear transformation of data that can + be used for data visualization and fast classification. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + init : None, string or numpy array, optional (default=None) + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). If None, will be set automatically to + 'auto' (this option is to raise a warning if 'init' is not set, + and stays to its default value None, in v0.5.0). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + max_iter : int, optional (default=100) + Maximum number of iterations done by the optimization algorithm. + + tol : float, optional (default=None) + Convergence tolerance for the optimization. + + verbose : bool, optional (default=False) + Whether to print progress messages or not. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + Examples + -------- + + >>> import numpy as np + >>> from metric_learn import NCA + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> nca = NCA(max_iter=1000) + >>> nca.fit(X, Y) + Attributes ---------- n_iter_ : `int` @@ -39,81 +118,21 @@ class NCA(MahalanobisMixin, TransformerMixin): transformer_ : `numpy.ndarray`, shape=(n_components, n_features) The learned linear transformation ``L``. + + References + ---------- + .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. `Neighbourhood + Components Analysis + `_. + Advances in Neural Information Processing Systems. 17, 513-520, 2005. + + .. [2] Wikipedia entry on `Neighborhood Components Analysis + `_ """ def __init__(self, init=None, n_components=None, num_dims='deprecated', max_iter=100, tol=None, verbose=False, preprocessor=None, random_state=None): - """Neighborhood Components Analysis - - Parameters - ---------- - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - max_iter : int, optional (default=100) - Maximum number of iterations done by the optimization algorithm. - - tol : float, optional (default=None) - Convergence tolerance for the optimization. - - verbose : bool, optional (default=False) - Whether to print progress messages or not. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. - """ self.n_components = n_components self.init = init self.num_dims = num_dims diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 45c9bbf2..7f8d7a4b 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -1,14 +1,5 @@ -r""" -Relative Components Analysis(RCA) - -RCA learns a full rank Mahalanobis distance metric based on a weighted sum of -in-chunklets covariance matrices. It applies a global linear transformation to -assign large weights to relevant dimensions and low weights to irrelevant -dimensions. Those relevant dimensions are estimated using "chunklets", subsets -of points that are known to belong to the same class. - -Read more in the :ref:`User Guide `. - +""" +Relative Components Analysis (RCA) """ from __future__ import absolute_import @@ -41,6 +32,54 @@ def _chunk_mean_centering(data, chunks): class RCA(MahalanobisMixin, TransformerMixin): """Relevant Components Analysis (RCA) + RCA learns a full rank Mahalanobis distance metric based on a weighted sum of + in-chunklets covariance matrices. It applies a global linear transformation + to assign large weights to relevant dimensions and low weights to irrelevant + dimensions. Those relevant dimensions are estimated using "chunklets", + subsets of points that are known to belong to the same class. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + pca_comps : int, float, None or string + Number of components to keep during PCA preprocessing. + If None (default), does not perform PCA. + If ``0 < pca_comps < 1``, it is used as + the minimum explained variance ratio. + See sklearn.decomposition.PCA for more details. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + + Examples + -------- + >>> from metric_learn import RCA_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) + >>> rca.fit(X, Y) + + References + ------------------ + .. [1] `Adjustment learning and relevant component analysis + `_ Noam + Shental, et al. + + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -49,30 +88,6 @@ class RCA(MahalanobisMixin, TransformerMixin): def __init__(self, n_components=None, num_dims='deprecated', pca_comps=None, preprocessor=None): - """Initialize the learner. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - pca_comps : int, float, None or string - Number of components to keep during PCA preprocessing. - If None (default), does not perform PCA. - If ``0 < pca_comps < 1``, it is used as - the minimum explained variance ratio. - See sklearn.decomposition.PCA for more details. - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - """ self.n_components = n_components self.num_dims = num_dims self.pca_comps = pca_comps @@ -148,6 +163,27 @@ def _inv_sqrtm(x): class RCA_Supervised(RCA): """Supervised version of Relevant Components Analysis (RCA) + `RCA_Supervised` creates chunks of similar points by first sampling a + class, taking `chunk_size` elements in it, and repeating the process + `num_chunks` times. + + Parameters + ---------- + n_components : int or None, optional (default=None) + Dimensionality of reduced space (if None, defaults to dimension of X). + + num_dims : Not used + + .. deprecated:: 0.5.0 + `num_dims` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use `n_components` instead. + + num_chunks: int, optional + chunk_size: int, optional + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -157,29 +193,6 @@ class RCA_Supervised(RCA): def __init__(self, num_dims='deprecated', n_components=None, pca_comps=None, num_chunks=100, chunk_size=2, preprocessor=None): - """Initialize the supervised version of `RCA`. - - `RCA_Supervised` creates chunks of similar points by first sampling a - class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. - - Parameters - ---------- - n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - num_chunks: int, optional - chunk_size: int, optional - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - """ RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index b83c553d..db84592d 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -1,15 +1,5 @@ -r""" -Sparse High-Dimensional Metric Learning(SDML) - -SDML is an efficient sparse metric learning in high-dimensional space via -double regularization: an L1-penalization on the off-diagonal elements of the -Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence between -:math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either :math:`\mathbf{I}` -or :math:`\mathbf{\Omega}^{-1}`, where :math:`\mathbf{\Omega}` is the -covariance matrix). - -Read more in the :ref:`User Guide `. - +""" +Sparse High-Dimensional Metric Learning (SDML) """ from __future__ import absolute_import @@ -38,55 +28,6 @@ class _BaseSDML(MahalanobisMixin): def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, use_cov='deprecated', verbose=False, preprocessor=None, random_state=None): - """ - Parameters - ---------- - balance_param : float, optional - trade off between sparsity and M0 prior - - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) - - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. - """ self.balance_param = balance_param self.sparsity_param = sparsity_param self.prior = prior @@ -190,6 +131,63 @@ def _fit(self, pairs, y): class SDML(_BaseSDML, _PairsClassifierMixin): """Sparse Distance Metric Learning (SDML) + SDML is an efficient sparse metric learning in high-dimensional space via + double regularization: an L1-penalization on the off-diagonal elements of the + Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence + between :math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either + :math:`\mathbf{I}` or :math:`\mathbf{\Omega}^{-1}`, where + :math:`\mathbf{\Omega}` is the covariance matrix). + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + balance_param : float, optional + trade off between sparsity and M0 prior + + sparsity_param : float, optional + trade off between optimizer and sparseness (see graph_lasso) + + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) @@ -200,6 +198,27 @@ class SDML(_BaseSDML, _PairsClassifierMixin): If the distance metric between two points is lower than this threshold, points will be classified as similar, otherwise they will be classified as dissimilar. + + Examples + -------- + >>> from metric_learn import SDML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> sdml = SDML_Supervised(num_constraints=200) + >>> sdml.fit(X, Y) + + References + ---------- + + .. [1] Qi et al. + An efficient sparse metric learning in high-dimensional space via + L1-penalized log-determinant regularization. ICML 2009. + http://lms.comp.nus.edu.sg/sites/default/files/publication\ +-attachments/icml09-guojun.pdf + + .. [2] Adapted from https://gist.github.com/kcarnold/5439945 """ def fit(self, pairs, y, calibration_params=None): @@ -210,7 +229,7 @@ def fit(self, pairs, y, calibration_params=None): Parameters ---------- - pairs : array-like, shape=(n_constraints, 2, n_features) or + pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) 3D Array of pairs with each row corresponding to two points, or 2D array of indices of pairs if the metric learner uses a @@ -238,74 +257,78 @@ def fit(self, pairs, y, calibration_params=None): class SDML_Supervised(_BaseSDML, TransformerMixin): """Supervised version of Sparse Distance Metric Learning (SDML) + `SDML_Supervised` creates pairs of similar sample by taking same class + samples, and pairs of dissimilar samples by taking different class + samples. It then passes these pairs to `SDML` for training. + + Parameters + ---------- + balance_param : float, optional + trade off between sparsity and M0 prior + sparsity_param : float, optional + trade off between optimizer and sparseness (see graph_lasso) + prior : None, string or numpy array, optional (default=None) + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + use_cov : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. + + num_labeled : Not used + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. + num_constraints : int, optional + number of constraints to generate + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) The linear transformation ``L`` deduced from the learned Mahalanobis metric (See function `transformer_from_metric`.) + + See Also + -------- + metric_learn.SDML : The original weakly-supervised algorithm + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. """ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, use_cov='deprecated', num_labeled='deprecated', num_constraints=None, verbose=False, preprocessor=None, random_state=None): - """Initialize the supervised version of `SDML`. - - `SDML_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `SDML` for training. - - Parameters - ---------- - balance_param : float, optional - trade off between sparsity and M0 prior - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints : int, optional - number of constraints to generate - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. - """ _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, use_cov=use_cov, verbose=verbose, From d207719b7c3e905f5000ac1f1059a98d76168ea0 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 09:27:27 +0200 Subject: [PATCH 04/16] Fix plot_metric_example --- examples/plot_metric_learning_examples.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index fd6cff20..9056c559 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -202,10 +202,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`MMC # ` -itml = metric_learn.ITML_Supervised() -X_itml = itml.fit_transform(X, y) +mmc = metric_learn.MMC_Supervised() +X_mmc = mmc.fit_transform(X, y) -plot_tsne(X_itml, y) +plot_tsne(X_mmc, y) ###################################################################### # Sparse Determinant Metric Learning @@ -221,7 +221,8 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`SDML # ` -sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015) +sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015, + prior='covariance') X_sdml = sdml.fit_transform(X, y) plot_tsne(X_sdml, y) @@ -240,7 +241,8 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`LSML # ` -lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000) +lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000, + prior='covariance') X_lsml = lsml.fit_transform(X, y) plot_tsne(X_lsml, y) From 19434be2fee0d5088271612738f76c38ce7bf96e Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 09:37:05 +0200 Subject: [PATCH 05/16] Update links with the new pages --- examples/plot_metric_learning_examples.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 9056c559..0d602cbb 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -130,7 +130,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LMNN -# ` +# ` ###################################################################### @@ -181,7 +181,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`ITML -# ` +# ` itml = metric_learn.ITML_Supervised() X_itml = itml.fit_transform(X, y) @@ -200,7 +200,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`MMC -# ` +# ` mmc = metric_learn.MMC_Supervised() X_mmc = mmc.fit_transform(X, y) @@ -219,7 +219,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`SDML -# ` +# ` sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015, prior='covariance') @@ -239,7 +239,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LSML -# ` +# ` lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000, prior='covariance') @@ -267,7 +267,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`NCA -# ` +# ` nca = metric_learn.NCA(max_iter=1000) X_nca = nca.fit_transform(X, y) @@ -287,7 +287,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`LFDA -# ` +# ` lfda = metric_learn.LFDA(k=2, num_dims=2) X_lfda = lfda.fit_transform(X, y) @@ -308,7 +308,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`RCA -# ` +# ` rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) X_rca = rca.fit_transform(X, y) @@ -328,7 +328,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # - See more in the :ref:`User Guide ` # - See more in the documentation of the class :py:class:`MLKR -# ` +# ` # # To illustrate MLKR, let's use the dataset # `sklearn.datasets.make_regression` the same way as we did with the @@ -447,8 +447,8 @@ def create_constraints(labels): ###################################################################### # Using our constraints, let's now train ITML again. Note that we are no # longer calling the supervised class :py:class:`ITML_Supervised -# ` but the more generic -# (weakly-supervised) :py:class:`ITML `, which +# ` but the more generic +# (weakly-supervised) :py:class:`ITML `, which # takes the dataset `X` through the `preprocessor` argument (see # :ref:`this section ` of the documentation to learn # about more advanced uses of `preprocessor`) and the pair information `pairs` From 99e1adb7a0943dddfeed2614a45a22fd74d7999b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 09:58:48 +0200 Subject: [PATCH 06/16] Use random init for LMNN --- examples/plot_metric_learning_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 0d602cbb..b46d1adc 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) +lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6, init='random') # fit the data! lmnn.fit(X, y) From d90853f705e361103880c7636281a0ba5662f7c5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 12 Jun 2019 11:12:56 +0200 Subject: [PATCH 07/16] Update remaining num_dims --- metric_learn/mlkr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index eab6a60a..0d5ef2dc 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -72,7 +72,7 @@ class MLKR(MahalanobisMixin, TransformerMixin): numpy array n_features_b must match the dimensionality of the inputs passed to :meth:`fit` and n_features_a must be less than or equal to that. - If ``num_dims`` is not None, n_features_a must match it. + If ``n_components`` is not None, n_features_a must match it. A0: Not used. .. deprecated:: 0.5.0 From 0a25a0aa5cd611e8ecc42427c709406a529a4489 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 14 Jun 2019 13:05:09 +0200 Subject: [PATCH 08/16] Add description of the API for Weakly Supervised and Supervised algorithms --- doc/metric_learn.rst | 2 +- doc/supervised.rst | 107 +++++++++--- doc/unsupervised.rst | 37 ++++ doc/weakly_supervised.rst | 349 ++++++++++++++++++++++++++++++++----- metric_learn/covariance.py | 2 + 5 files changed, 431 insertions(+), 66 deletions(-) create mode 100644 doc/unsupervised.rst diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 55479672..3c36b0ea 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -32,7 +32,7 @@ Supervised Learning Algorithms metric_learn.RCA_Supervised Weakly Supervised Learning Algorithms ------------------------------- +------------------------------------- .. autosummary:: :toctree: generated/ diff --git a/doc/supervised.rst b/doc/supervised.rst index 414560c4..a59d2d89 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -8,38 +8,100 @@ labels `y`, and learn a distance matrix that make points from the same class other, and points from different classes or with distant target values far away from each other. -Scikit-learn compatibility -========================== +General API +=========== -All supervised algorithms are scikit-learn `Estimators`, so they are -compatible with Pipelining and scikit-learn model selection routines. +Supervised Metric Learning Algorithms are the easiest metric-learn algorithms +to use, since they use the same API as ``scikit-learn``. -Algorithms -========== - -Covariance +Input data ---------- +In order to train a model, you need two `array-like `_ objects, `X` and `y`. `X` +should be a 2D array-like of shape `(n_samples, n_features)`, where +`n_samples` is the number of points of your dataset and `n_features` is the +number of attributes of each of your points. `y` should be a 1D array-like +of shape `(n_samples,)`, containing for each point in `X` the class it +belongs to (or the value to regress for this sample, if you use `MLKR` for +instance). -.. todo:: Covariance is unsupervised, so its doc should not be here. +Here is an example of a dataset of two dogs and one +cat (the classes are 'dog' and 'cat') an animal being being represented by +two numbers. -`Covariance` does not "learn" anything, rather it calculates -the covariance matrix of the input data. This is a simple baseline method. +>>> import numpy as np +>>> X = np.array([[2.3, 3.6], [0.2, 0.5], [6.7, 2.1]]) +>>> y = np.array(['dog', 'cat', 'dog']) -.. topic:: Example Code: +.. note:: -:: + You can also use a preprocessor instead of directly giving the inputs as + 2D arrays. See the :ref:`preprocessor_section` section for more details. - from metric_learn import Covariance - from sklearn.datasets import load_iris +Fit, transform, and so on +------------------------- +The goal of supervised metric-learning algorithms is to transform +points in a new space, in which the distance between two points from the +same class will be small, and the distance between two points from different +classes will be large. To do so, we fit the metric learner (example: +`NCA`). - iris = load_iris()['data'] +>>> from metric_learn import NCA +>>> nca = NCA(random_state=42) +>>> nca.fit(X, y) +NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated', + preprocessor=None, random_state=42, tol=None, verbose=False) - cov = Covariance().fit(iris) - x = cov.transform(iris) -.. topic:: References: +Now that the estimator is fitted, you can use it on new data for several +purposes. + +First, you can transform the data in the learned space, using `transform`: +Here we transform two points in the new embedding space. + +>>> X_new = np.array([[9.4, 4.1], [2.1, 4.4]]) +>>> nca.transform(X_new) +array([[ 5.91884732, 10.25406973], + [ 3.1545886 , 6.80350083]]) + +Also, as explained before, our metric learners has learn a distance between +points. You can use this distance in two main ways: + +- You can either return the distance between pairs of points using the + `score_pairs` function: - .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 +>>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) +array([0.49627072, 3.65287282]) + +- Or you can return a function that will return the distance (in the new + space) between two 1D arrays (the coordinates of the points in the original + space), similarly to distance functions in `scipy.spatial.distance`. + +>>> metric_fun = nca.get_metric() +>>> metric_fun([3.5, 3.6], [5.6, 2.4]) +0.4962707194621285 + +.. note:: + + If the metric learner that you use learns a Mahalanobis Matrix (like it is + the case for all algorithms currently in metric-learn), you can get the + plain learned Mahalanobis matrix using `get_mahalanobis_matrix`. + + >>> nca.get_mahalanobis_matrix() + array([[0.43680409, 0.89169412], + [0.89169412, 1.9542479 ]]) + +.. TODO: remove the "like it is the case etc..." if it's not the case anymore + +Scikit-learn compatibility +-------------------------- + +All supervised algorithms are scikit-learn `sklearn.base.Estimators`, and +`sklearn.base.TransformerMixin` so they are compatible with Pipelining and +scikit-learn model selection routines. + +Algorithms +========== .. _lmnn: @@ -87,11 +149,6 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, lmnn = LMNN(k=5, learn_rate=1e-6) lmnn.fit(X, Y, verbose=False) -If a recent version of the Shogun Python modular (``modshogun``) library -is available, the LMNN implementation will use the fast C++ version from -there. Otherwise, the included pure-Python version will be used. -The two implementations differ slightly, and the C++ version is more complete. - .. topic:: References: .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst new file mode 100644 index 00000000..1d5bef43 --- /dev/null +++ b/doc/unsupervised.rst @@ -0,0 +1,37 @@ +============================ +Unsupervised Metric Learning +============================ + +Unsupervised metric learning algorithms just take as input points `X`. For +now, in metric-learn, there only is `Covariance`, which is a simple +baseline algorithm (see below). + + +Algorithms +========== +.. _covariance: + +Covariance +---------- + +`Covariance` does not "learn" anything, rather it calculates +the covariance matrix of the input data. This is a simple baseline method. +It can be used for ZCA whitening of the data (see the Wikipedia page of +`whitening transformation `_). + +.. topic:: Example Code: + +:: + + from metric_learn import Covariance + from sklearn.datasets import load_iris + + iris = load_iris()['data'] + + cov = Covariance().fit(iris) + x = cov.transform(iris) + +.. topic:: References: + + .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 71ccb334..7e488ac7 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -11,17 +11,28 @@ and dissimilar points. Refer to the documentation of each algorithm for its particular form of input data. +General API +=========== + Input data -========== +---------- In the following paragraph we talk about tuples for sake of generality. These can be pairs, triplets, quadruplets etc, depending on the particular metric learning algorithm we use. Basic form ----------- -Every weakly supervised algorithm will take as input tuples of points, and if -needed labels for theses tuples. +^^^^^^^^^^ + +Every weakly supervised algorithm will take as input tuples of +points, and if needed labels for theses tuples. The tuples of points can +also be called "constraints". They are a set of points that we consider (ex: +two points, three points, etc...). The label is some information we have +about this set of points (e.g. "these two points are similar"). Note that +some information can be contained in the ordering of these tuples (see for +instance the section :ref:`learning_on_quadruplets`). For more details about +the specific of each algorithms, refer to the appropriate section: either +:ref:`learning_on_pairs` or :ref:`learning_on_quadruplets`) The `tuples` argument is the first argument of every method (like the X @@ -44,7 +55,7 @@ These are two data structures that can be used to represent tuple in metric learn: 3D array of tuples ------------------- +^^^^^^^^^^^^^^^^^^ The most intuitive way to represent tuples is to provide the algorithm with a 3D array-like of tuples of shape ``(n_tuples, t, n_features)``, where @@ -62,10 +73,10 @@ the number of features of each point. >>> [[-2.16, +0.11, -0.02], >>> [+1.58, +0.16, +0.93]], >>> ->>> [[+1.58, +0.16, +0.93 ], # same as tuples[1, 1, :] +>>> [[+1.58, +0.16, +0.93], # same as tuples[1, 1, :] >>> [+0.89, -0.34, +2.41]], >>> ->>> [[-0.12, -1.21, -0.20 ], # same as tuples[0, 0, :] +>>> [[-0.12, -1.21, -0.20], # same as tuples[0, 0, :] >>> [-2.16, +0.11, -0.02]]]) # same as tuples[1, 0, :] >>> y = np.array([-1, 1, 1, -1]) @@ -77,7 +88,7 @@ the number of features of each point. 2D array of indicators + preprocessor -------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Instead of forming each point in each tuple, a more efficient representation would be to keep the dataset of points ``X`` aside, and just represent tuples @@ -101,13 +112,7 @@ the feature dimension there, the resulting array is 2D. In order to fit metric learning algorithms with this type of input, we need to give the original dataset of points ``X`` to the estimator so that it knows the points the indices refer to. We do this when initializing the estimator, -through the argument `preprocessor`. - -.. topic:: Example: - ->>> from metric_learn import MMC ->>> mmc = MMC(preprocessor=X) ->>> mmc.fit(pairs_indice, y) +through the argument `preprocessor` (see below :ref:`fit_ws`) .. note:: @@ -118,17 +123,85 @@ through the argument `preprocessor`. paths in the filesystem, name of records in a database etc...) See section :ref:`preprocessor_section` for more details on how to use the preprocessor. -.. _sklearn_compat_ws: +.. _fit_ws: + +Fit, transform, and so on +------------------------- + +The goal of weakly-supervised metric-learning algorithms is to transform +points in a new space, in which the tuple-wise constraints between points +are respected. + +>>> from metric_learn import MMC +>>> mmc = MMC(random_state=42) +>>> mmc.fit(tuples, y) +MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, + preprocessor=None, random_state=42, verbose=False) + +Or alternatively (using a preprocessor): + +>>> from metric_learn import MMC +>>> mmc = MMC(preprocessor=X, random_state=42) +>>> mmc.fit(pairs_indice, y) + +Now that the estimator is fitted, you can use it on new data for several +purposes. + +First, you can transform the data in the learned space, using `transform`: +Here we transform two points in the new embedding space. + +>>> X_new = np.array([[9.4, 4.1, 4.2], [2.1, 4.4, 2.3]]) +>>> mmc.transform(X_new) +array([[-3.24667162e+01, 4.62622348e-07, 3.88325421e-08], + [-3.61531114e+01, 4.86778289e-07, 2.12654397e-08]]) + +Also, as explained before, our metric learners has learn a distance between +points. You can use this distance in two main ways: + +- You can either return the distance between pairs of points using the + `score_pairs` function: + +>>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], +... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]]) +array([7.27607365, 0.88853014]) + +- Or you can return a function that will return the distance + (in the new space) between two 1D arrays (the coordinates of the points in + the original space), similarly to distance functions in + `scipy.spatial.distance`. To do that, use the `get_metric` method. + +>>> metric_fun = mmc.get_metric() +>>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) +7.276073646278203 + +.. note:: + + If the metric learner that you use learns a Mahalanobis Matrix (like it is + the case for all algorithms currently in metric-learn), you can get the + plain Mahalanobis matrix using `get_mahalanobis_matrix`. + +>>> mmc.get_mahalanobis_matrix() +array([[ 0.58603894, -5.69883982, -1.66614919], + [-5.69883982, 55.41743549, 16.20219519], + [-1.66614919, 16.20219519, 4.73697721]]) + +.. TODO: remove the "like it is the case etc..." if it's not the case anymore + +.. _sklearn_compat_ws: + Scikit-learn compatibility -========================== +-------------------------- Weakly supervised estimators are compatible with scikit-learn routines for model selection (grid-search, cross-validation etc). See the scoring section -for more details on the scoring used in the case of Weakly Supervised -Metric Learning. +of the appropriate algorithm (:ref:`pairs learners ` +or :ref:`quadruplets learners `) +for more details on the scoring used in the case of Weakly Supervised Metric +Learning. -.. topic:: Example +Example: >>> from metric_learn import MMC >>> from sklearn.datasets import load_iris @@ -141,13 +214,22 @@ Metric Learning. >>> mmc = MMC(preprocessor=X) >>> cross_val_score(mmc, pairs_indices, y) -Scoring -======= +Prediction and scoring +---------------------- -Some default scoring are implemented in metric-learn, depending on the kind of -tuples you're working with (pairs, triplets...). See the docstring of the -`score` method of the estimator you use. +Since weakly supervised are also able, after being fitted, to predict for a +given tuple what is its label (for pairs) or ordering (for quadruplets). See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one +` for quadruplets. +They also implement a default scoring method, `score`, that can be +used to evaluate the performance of a metric-learner on a test dataset. See +the appropriate section for more details, either :ref:`this +one ` for pairs, or :ref:`this one ` +for quadruplets. + +.. _learning_on_pairs: Learning on pairs ================= @@ -158,15 +240,46 @@ corresponding target containing ``n_samples`` values being either +1 or -1. These values indicate whether the given pairs are similar points or dissimilar points. +Fitting +------- +Here is an example for fitting on pairs (see :ref:`fit_ws` for more details on +the input data format and how to fit, in the general case of learning on +tuples). + +>>> from metric_learn import MMC +>>> pairs = np.array([[[1.2, 3.2], [2.3, 5.5]], +>>> [[4.5, 2.3], [2.1, 2.3]]]) +>>> y_pairs = np.array([1, -1]) +>>> mmc = MMC(random_state=42) +>>> mmc.fit(pairs, y_pairs) +MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, + diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None, + random_state=42, verbose=False) + +Here, we learned a metric that puts the two first points closer +together in the transformed space, and the two next points further away from +each other. + +.. _pairs_predicting: + +Predicting +---------- + +When a pairs learner is fitted, it is also able to predict, for an +upcoming pair, whether it is a pair of similar or dissimilar points. + +>>> mmc.predict([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]]]) +array([1, -1]) .. _calibration: Thresholding ------------ In order to predict whether a new pair represents similar or dissimilar -samples, we need to set a distance threshold, so that points closer (in the -learned space) than this threshold are predicted as similar, and points further -away are predicted as dissimilar. Several methods are possible for this +samples, we in fact need to set a distance threshold, so that points closer (in +the learned space) than this threshold are predicted as similar, and points +further away are predicted as dissimilar. Several methods are possible for this thresholding. - **At fit time**: The threshold is set with `calibrate_threshold` (see @@ -177,24 +290,71 @@ thresholding. overfitting. If you want to avoid that, calibrate the threshold after fitting, on a validation set. + >>> mmc.fit(pairs, y) # will fit the threshold automatically after fitting + - **Manual**: calling `set_threshold` will set the threshold to a particular value. + >>> mmc.set_threshold(0.4) + - **Calibration**: calling `calibrate_threshold` will calibrate the threshold to achieve a particular score on a validation set, the score being among the classical scores for classification (accuracy, f1 score...). + >>> mmc.calibrate_threshold(pairs, y) See also: `sklearn.calibration`. +.. _pairs_scoring: + +Scoring +------- + +Not only are they able to predict the label of given pairs, they can also +return a `decision_function` for a set of pairs. It is basically the "score" +that will be thresholded to find the prediction for the pair. In fact this +"score" is the opposite of the distance in the new space (higher score means + points are similar, and lower score dissimilar). + +>>> mmc.decision_function([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]]]) +array([-0.12811124, -0.74750256]) + +This allows to return all kinds of estimator scoring usually used in classic +classification tasks, like `sklearn.metrics.accuracy` for instance, which +can be used inside cross-validation routines: + +>>> from sklearn.model_selection import cross_val_score +>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]], +... [[7.7, 5.6], [1.23, 8.4]]]) +>>> y_test = np.array([-1., 1., -1.]) +>>> cross_val_score(mmc, pairs_test, y_test, scoring='accuracy') +array([1., 0., 1.]) + +Pairs learners also have a default score, which basically +returns the `sklearn.metrics.roc_auc_score` (therefore is not dependent on +the threshold). + +>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]], +... [[3.2, 1.1], [5.4, 6.1]], +... [[7.7, 5.6], [1.23, 8.4]]]) +>>> y_test = np.array([-1., 1., -1.]) +>>> mmc.score(pairs_test, y_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `score_pairs`, + `get_metric` and `get_mahalanobis_matrix`. Algorithms -========== +---------- .. _itml: :py:class:`ITML ` ------------------------------------------ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Information Theoretic Metric Learning(:py:class:`ITML `) @@ -273,7 +433,7 @@ is the prior distance metric, set to identity matrix by default, .. _sdml: :py:class:`SDML ` ------------------------------------------ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sparse High-Dimensional Metric Learning (:py:class:`SDML `) @@ -333,7 +493,7 @@ is the off-diagonal L1 norm. .. _rca: :py:class:`RCA ` --------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Relative Components Analysis (:py:class:`RCA `) @@ -389,7 +549,7 @@ as the Mahalanobis matrix. .. _mmc: :py:class:`MMC ` --------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Metric Learning with Application for Clustering with Side Information (:py:class:`MMC `) @@ -442,22 +602,131 @@ points, while constrains the sum of distances between dissimilar points: .. [2] Adapted from Matlab code `here `_. + +.. _learning_on_quadruplets: + Learning on quadruplets ======================= -A type of information even weaker than pairs is information about relative -comparisons between pairs. The user should provide the algorithm with a -quadruplet of points, where the two first points are closer than the two -last points. No target vector (``y``) is needed, since the supervision is -already in the order that points are given in the quadruplet. + + +The goal of weakly-supervised metric-learning algorithms is to transform +points in a new space, in which the tuple-wise constraints between points +are respected. + +Fitting +------- +Here is an example for fitting on quadruplets (see :ref:`fit_ws` for more +details on the input data format and how to fit, in the general case of +learning on tuples). + +>>> from metric_learn import LSML +>>> quadruplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.4, 6.7], [2.1, 0.6]], +>>> [[4.5, 2.3], [2.1, 2.3], [0.6, 1.2], [7.3, 3.4]]]) +>>> lsml = LSML(random_state=42) +>>> lsml.fit(quadruplets) +LSML(max_iter=1000, preprocessor=None, prior=None, random_state=42, tol=0.001, + verbose=False) + +Or alternatively (using a preprocessor): + +>>> X = np.array([[1.2, 3.2], +>>> [2.3, 5.5], +>>> [2.4, 6.7], +>>> [2.1, 0.6], +>>> [4.5, 2.3], +>>> [2.1, 2.3], +>>> [0.6, 1.2], +>>> [7.3, 3.4]]) +>>> quadruplets_indices = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) +>>> lsml = LSML(preprocessor=X, random_state=42) +>>> lsml.fit(quadruplets_indices) +LSML(max_iter=1000, + preprocessor=array([[1.2, 3.2], + [2.3, 5.5], + [2.4, 6.7], + [2.1, 0.6], + [4.5, 2.3], + [2.1, 2.3], + [0.6, 1.2], + [7.3, 3.4]]), + prior=None, random_state=42, tol=0.001, verbose=False) + + +Here, we want to learn a metric that, for each of the two +`quadruplets`, will put the two first points closer together than the two +last points. + +.. _quadruplets_predicting: + +Predicting +---------- + +When a quadruplets learner is fitted, it is also able to predict, for an +upcoming quadruplet, whether the two first points are more similar than the +two last points (+1), or not (-1). + +>>> quadruplets_test = np.array( +... [[[5.6, 5.3], [2.2, 2.1], [0.4, 0.6], [1.2, 3.4]], +... [[6.0, 4.2], [4.3, 1.2], [4.5, 0.6], [0.1, 7.8]]]) +>>> lsml.predict(quadruplets_test) +array([-1., 1.]) + +.. _quadruplets_scoring: + +Scoring +------- + +Not only are they able to predict the label of given pairs, they can also +return a `decision_function` for a set of pairs. It is basically the "score" +which sign will be taken to find the prediction for the pair. In fact this +"score" is the difference between the distance between the two last points, +and the distance between the two last points of the quadruplet (higher +score means the two last points are more likely to be more dissimilar than +the two first points (i.e. more likely to have a +1 prediction since it's +the right ordering)). + +>>> lsml.decision_function(quadruplets_test) +array([-1.75700306, 4.98982131]) + +In the above example, for the first quadruplet in `quadruplets_test`, the +two first points are predicted less similar than the two last points (they +are further away in the transformed space). + +Unlike for pairs learners, quadruplets learners don't allow to give a `y` +when fitting, which does not allow to use scikit-learn scoring functions +like: + +>>> from sklearn.model_selection import cross_val_score +>>> cross_val_score(lsml, quadruplets, scoring='f1_score') # this won't work + +(This is actually intentional, for more details +about that, see +`this comment `_ +on github.) + +However, quadruplets learners do have a default scoring function, which will +basically return the accuracy score on a given test set, i.e. the proportion +of quadruplets have the right predicted ordering. + +>>> lsml.score(quadruplets_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `score_pairs`, + `get_metric` and `get_mahalanobis_matrix`. + + + Algorithms -========== +---------- .. _lsml: :py:class:`LSML ` ------------------------------------------ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Metric Learning from Relative Comparisons by Minimizing Squared Residual (:py:class:`LSML `) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index a973633f..b9666494 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -20,6 +20,8 @@ class Covariance(MahalanobisMixin, TransformerMixin): This is a simple baseline method first introduced in On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 + Read more in the :ref:`User Guide `. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_features, n_features) From 8393de13102145177bc4117f0bdf008c78d2ef6c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 14 Jun 2019 14:02:11 +0200 Subject: [PATCH 09/16] Remove to have the list of methods in the description of the class --- doc/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index a5114d47..e7e6a108 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -29,7 +29,6 @@ exclude_patterns = ['_build'] pygments_style = 'sphinx' todo_include_todos = True -numpydoc_show_class_members = False # Options for HTML output html_theme = 'sphinx_rtd_theme' From 5fc65845bd0efa7c9ffe57ea9c09430b39053684 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 12:05:31 +0200 Subject: [PATCH 10/16] Fix: Update wronly merged lmnn with None instead of 'auto' in init --- metric_learn/lmnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 20df49ee..a4dccacb 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -133,7 +133,7 @@ class LMNN(MahalanobisMixin, TransformerMixin): Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul """ - def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000, + def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, use_pca=True, verbose=False, preprocessor=None, n_components=None, num_dims='deprecated', random_state=None): From 0fb6a0871d495cac3e308d01c2d0f3f8ceee5a12 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Tue, 25 Jun 2019 17:26:09 +0200 Subject: [PATCH 11/16] Add some documentation about generating pairs and quadruplets --- doc/supervised.rst | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index a59d2d89..5520ce8e 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -368,9 +368,27 @@ Supervised versions of weakly-supervised algorithms Note that each :ref:`weakly-supervised algorithm ` has a supervised version of the form `*_Supervised` where similarity tuples are generated from the labels information and passed to the underlying algorithm. - -.. todo:: add more details about that (see issue ``_) +These constraints are sampled randomly under the hood. + +For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points +from the dataset), and labels (`int` indicating whether the two points are +similar (+1) or dissimilar (-1)), are sampled with the function +`metric_learn.constraints.positive_negative_pairs`. To sample positive pairs +(of label +1), this method will look at all the samples from the same label and +sample randomly a pair among them. To sample negative pairs (of label -1), this +method will look at all the samples from a different class and sample randomly +a pair among them. The method will try to build `num_constraints` positive +pairs and `num_constraints` negative pairs, but sometimes it cannot find enough +of one of those, so forcing `same_length=True` will return both times the +minimum of the two lenghts. + +For using quadruplets learners (see :ref:`learning_on_quadruplets`) in a +supervised way, we will basically sample positive and negative pairs like +before, but we'll just concatenate them, so that we have a 3D array of +quadruplets, where for each quadruplet the two first points are in fact points +from the same class, and the two last points are in fact points from a +different class (so indeed the two last points should be less similar than the +two first points). .. topic:: Example Code: From 46bc12feb5467b9d7e450e3ed03fabc257159bde Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 27 Jun 2019 13:31:29 +0200 Subject: [PATCH 12/16] Add base module quick description --- metric_learn/base_metric.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 547c1217..558d28fd 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -1,3 +1,7 @@ +""" +Base module. +""" + from sklearn.base import BaseEstimator from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted From 0100ce2b9382fb98acd97046c06d6cd709712b10 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 27 Jun 2019 15:45:31 +0200 Subject: [PATCH 13/16] Put classes rather than modules --- doc/_templates/module.rst | 15 --------------- doc/metric_learn.rst | 8 +++++--- 2 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 doc/_templates/module.rst diff --git a/doc/_templates/module.rst b/doc/_templates/module.rst deleted file mode 100644 index 26cc83fa..00000000 --- a/doc/_templates/module.rst +++ /dev/null @@ -1,15 +0,0 @@ -:mod:`{{module}}` module -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. automodule:: metric_learn.base_metric - :members: - :undoc-members: - :show-inheritance: - -.. include:: {{module}}.examples - -.. raw:: html - -
diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 3c36b0ea..930404d0 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -9,10 +9,12 @@ Base Classes .. autosummary:: :toctree: generated/ - :template: module.rst + :template: class.rst - metric_learn.base_metric - metric_learn.constraints + metric_learn.Constraints + metric_learn.base_metric.BaseMetricLearner + metric_learn.base_metric._PairsClassifierMixin + metric_learn.base_metric._QuadrupletsClassifierMixin Supervised Learning Algorithms ------------------------------ From b2ea7c2e011582b9fe6e8226e3d10f3b19897a9d Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 27 Jun 2019 15:47:36 +0200 Subject: [PATCH 14/16] Add docstrings at top of algos --- metric_learn/base_metric.py | 8 ++++++-- metric_learn/constraints.py | 5 +++++ metric_learn/lfda.py | 1 - 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 558d28fd..1e5fa974 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -14,7 +14,8 @@ class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): - """Base class for all metric-learners + """ + Base class for all metric-learners. Parameters ---------- @@ -301,7 +302,8 @@ def get_mahalanobis_matrix(self): class _PairsClassifierMixin(BaseMetricLearner): - """ + """Base class for pairs learners. + Attributes ---------- threshold_ : `float` @@ -573,6 +575,8 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, class _QuadrupletsClassifierMixin(BaseMetricLearner): + """Base class for quadruplets learners. + """ _tuple_size = 4 # number of points in a tuple, 4 for quadruplets diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index e591830b..069a6564 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -11,6 +11,11 @@ class Constraints(object): + """ + Class to build constraints from labels. + + See more in the :ref:`User Guide ` + """ def __init__(self, partial_labels): '''partial_labels : int arraylike, -1 indicating unknown label''' partial_labels = np.asanyarray(partial_labels, dtype=int) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index 7a0bb80a..6c651b7b 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -16,7 +16,6 @@ class LFDA(MahalanobisMixin, TransformerMixin): ''' Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction - Sugiyama, ICML 2006 LFDA is a linear supervised dimensionality reduction method. It is particularly useful when dealing with multimodality, where one ore more From 70943c3697fff16681a1bf92e5bb0ebe88256ac3 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 27 Jun 2019 15:54:23 +0200 Subject: [PATCH 15/16] Update name of API details to Package Contents --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 3e4d0ce3..9d303bee 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -25,7 +25,7 @@ Documentation outline .. toctree:: :maxdepth: 2 - Package Overview + Package Contents .. toctree:: :maxdepth: 2 From f98a7813cfbaaae567d379480f9cb2cd76c1786b Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Thu, 27 Jun 2019 15:54:23 +0200 Subject: [PATCH 16/16] Update name of API details to Package Contents --- doc/index.rst | 2 +- metric_learn/itml.py | 42 ++++++++-------- metric_learn/lmnn.py | 16 +++++++ metric_learn/lsml.py | 34 ++++++------- metric_learn/mmc.py | 112 +++++++++++++++++++++++-------------------- metric_learn/sdml.py | 10 ++-- 6 files changed, 121 insertions(+), 95 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 3e4d0ce3..9d303bee 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -25,7 +25,7 @@ Documentation outline .. toctree:: :maxdepth: 2 - Package Overview + Package Contents .. toctree:: :maxdepth: 2 diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 51e34667..16fc21db 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -124,42 +124,44 @@ class ITML(_BaseITML, _PairsClassifierMixin): Parameters ---------- - gamma : float, optional - value for slack variables + gamma : float, optional (default=1.) + Value for slack variables - max_iter : int, optional + max_iter : int, optional (default=1000) + Maximum number of iteration of the optimization procedure. - convergence_threshold : float, optional + convergence_threshold : float, optional (default=1e-3) + Convergence tolerance. prior : string or numpy array, optional (default='identity') - The Mahalanobis matrix to use as a prior. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). - 'identity' + 'identity' An identity matrix of shape (n_features, n_features). - 'covariance' + 'covariance' The inverse covariance matrix. - 'random' + 'random' The prior will be a random SPD matrix of shape `(n_features, n_features)`, generated using `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional - if True, prints information while learning + verbose : bool, optional (default=False) + If True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index a4dccacb..600d55c0 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -74,6 +74,22 @@ class LMNN(MahalanobisMixin, TransformerMixin): k : int, optional Number of neighbors to consider, not including self-edges. + min_iter : int, optional (default=50) + Minimum number of iterations of the optimization procedure. + + max_iter : int, optional (default=1000) + Maximum number of iterations of the optimization procedure. + + learn_rate : float, optional (default=1e-7) + Learning rate of the optimization procedure + + tol : float, optional (default=0.001) + Tolerance of the optimization procedure. If the objective value varies + less than `tol`, we consider the algorithm has converged and stop it. + + verbose : bool, optional (default=False) + Whether to print the progress of the optimization procedure. + regularization: float, optional Weighting of pull and push terms, with 0.5 meaning equal weight. diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 48b3b54e..e3b0d323 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -243,31 +243,33 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): Parameters ---------- - tol : float, optional - max_iter : int, optional + tol : float, optional (default=1e-3) + Tolerance for the convergence procedure. + max_iter : int, optional (default=1000) + Number of maximum iterations of the convergence procedure. prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). If `None`, will be set + automatically to 'identity' (this is to raise a warning if + `prior` is not set, and stays to its default value (None), in v0.5.0). + + 'identity' An identity matrix of shape (n_features, n_features). - 'covariance' + 'covariance' The inverse covariance matrix. - 'random' + 'random' The initial Mahalanobis matrix will be a random positive definite (PD) matrix of shape `(n_features, n_features)`, generated using `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. num_labeled : Not used .. deprecated:: 0.5.0 `num_labeled` was deprecated in version 0.5.0 and will diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index ac2199c9..9f02425c 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -344,59 +344,65 @@ class MMC(_BaseMMC, _PairsClassifierMixin): Read more in the :ref:`User Guide `. Parameters - ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - An SPD matrix of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning - - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions. The initialization will then - be the diagonal coefficients of the matrix given as 'init'. - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. + ---------- + max_iter : int, optional (default=100) + Maximum number of iterations of the convergence procedure. + + max_proj : int, optional (default=10000) + Maximum number of projection steps. + + convergence_threshold : float, optional (default=1e-6) + Convergence threshold for the convergence procedure. + + init : None, string or numpy array, optional (default=None) + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). If None, will be set + automatically to 'identity' (this is to raise a warning if + 'init' is not set, and stays to its default value (None), in v0.5.0). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. + + verbose : bool, optional + if True, prints information while learning + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + A0 : Not used. + .. deprecated:: 0.5.0 + `A0` was deprecated in version 0.5.0 and will + be removed in 0.6.0. Use 'init' instead. + diagonal : bool, optional + if True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + diagonal_c : float, optional + weight of the dissimilarity constraint for diagonal + metric learning + verbose : bool, optional + if True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. Attributes ---------- diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 935a4ba3..70e65c86 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -177,7 +177,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin): `A0` was deprecated in version 0.5.0 and will be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional + verbose : bool, optional (default=False) if True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable @@ -263,9 +263,9 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Parameters ---------- - balance_param : float, optional + balance_param : float, optional (default=0.5) trade off between sparsity and M0 prior - sparsity_param : float, optional + sparsity_param : float, optional (default=0.01) trade off between optimizer and sparseness (see graph_lasso) prior : None, string or numpy array, optional (default=None) Prior to set for the metric. Possible options are @@ -300,9 +300,9 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): .. deprecated:: 0.5.0 `num_labeled` was deprecated in version 0.5.0 and will be removed in 0.6.0. - num_constraints : int, optional + num_constraints : int, optional (default=None) number of constraints to generate - verbose : bool, optional + verbose : bool, optional (default=False) if True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like,