diff --git a/.gitignore b/.gitignore
index 449f70ea..8321c7d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ htmlcov/
.cache/
.pytest_cache/
doc/auto_examples/*
+doc/generated/*
\ No newline at end of file
diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst
new file mode 100644
index 00000000..f0c1b5bc
--- /dev/null
+++ b/doc/_templates/class.rst
@@ -0,0 +1,16 @@
+:mod:`{{module}}`.{{objname}}
+{{ underline }}==============
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+ :members:
+ :undoc-members:
+ :inherited-members:
+ :special-members: __init__
+
+.. include:: {{module}}.{{objname}}.examples
+
+.. raw:: html
+
+
diff --git a/doc/conf.py b/doc/conf.py
index a11f8bba..e7e6a108 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import sys
+import os
extensions = [
'sphinx.ext.autodoc',
@@ -28,7 +29,6 @@
exclude_patterns = ['_build']
pygments_style = 'sphinx'
todo_include_todos = True
-numpydoc_show_class_members = False
# Options for HTML output
html_theme = 'sphinx_rtd_theme'
@@ -50,3 +50,16 @@
'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
'scikit-learn': ('https://scikit-learn.org/stable/', None)
}
+
+
+# sphinx-gallery configuration
+sphinx_gallery_conf = {
+ # to generate mini-galleries at the end of each docstring in the API
+ # section: (see https://sphinx-gallery.github.io/configuration.html
+ # #references-to-examples)
+ 'doc_module': 'metric_learn',
+ 'backreferences_dir': os.path.join('generated'),
+}
+
+# generate autosummary even if no references
+autosummary_generate = True
diff --git a/doc/index.rst b/doc/index.rst
index 3e4d0ce3..9d303bee 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -25,7 +25,7 @@ Documentation outline
.. toctree::
:maxdepth: 2
- Package Overview
+ Package Contents
.. toctree::
:maxdepth: 2
diff --git a/doc/metric_learn.base_metric.rst b/doc/metric_learn.base_metric.rst
deleted file mode 100644
index 050a360b..00000000
--- a/doc/metric_learn.base_metric.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-metric_learn.base_metric module
-===============================
-
-.. automodule:: metric_learn.base_metric
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/doc/metric_learn.constraints.rst b/doc/metric_learn.constraints.rst
deleted file mode 100644
index 97d79002..00000000
--- a/doc/metric_learn.constraints.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-metric_learn.constraints module
-===============================
-
-.. automodule:: metric_learn.constraints
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/doc/metric_learn.covariance.rst b/doc/metric_learn.covariance.rst
deleted file mode 100644
index 493878c1..00000000
--- a/doc/metric_learn.covariance.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-Covariance metric (baseline method)
-===================================
-
-.. automodule:: metric_learn.covariance
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import Covariance
- from sklearn.datasets import load_iris
-
- iris = load_iris()['data']
-
- cov = Covariance().fit(iris)
- x = cov.transform(iris)
diff --git a/doc/metric_learn.itml.rst b/doc/metric_learn.itml.rst
deleted file mode 100644
index addb4c76..00000000
--- a/doc/metric_learn.itml.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Information Theoretic Metric Learning (ITML)
-============================================
-
-.. automodule:: metric_learn.itml
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import ITML_Supervised
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- itml = ITML_Supervised(num_constraints=200)
- itml.fit(X, Y)
-
-References
-----------
-`Information-theoretic Metric Learning `_ Jason V. Davis, et al.
diff --git a/doc/metric_learn.lfda.rst b/doc/metric_learn.lfda.rst
deleted file mode 100644
index 41088a68..00000000
--- a/doc/metric_learn.lfda.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-Local Fisher Discriminant Analysis (LFDA)
-=========================================
-
-.. automodule:: metric_learn.lfda
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- import numpy as np
- from metric_learn import LFDA
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- lfda = LFDA(k=2, dim=2)
- lfda.fit(X, Y)
-
-References
-------------------
-`Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_ Masashi Sugiyama.
-
-`Local Fisher Discriminant Analysis on Beer Style Clustering `_ Yuan Tang.
diff --git a/doc/metric_learn.lmnn.rst b/doc/metric_learn.lmnn.rst
deleted file mode 100644
index bc65161e..00000000
--- a/doc/metric_learn.lmnn.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-Large Margin Nearest Neighbor (LMNN)
-====================================
-
-.. automodule:: metric_learn.lmnn
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- import numpy as np
- from metric_learn import LMNN
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- lmnn = LMNN(k=5, learn_rate=1e-6)
- lmnn.fit(X, Y, verbose=False)
-
-If a recent version of the Shogun Python modular (``modshogun``) library
-is available, the LMNN implementation will use the fast C++ version from
-there. Otherwise, the included pure-Python version will be used.
-The two implementations differ slightly, and the C++ version is more complete.
-
-References
-----------
-`Distance Metric Learning for Large Margin Nearest Neighbor Classification `_ Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul
diff --git a/doc/metric_learn.lsml.rst b/doc/metric_learn.lsml.rst
deleted file mode 100644
index 0deae4e6..00000000
--- a/doc/metric_learn.lsml.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Least Squares Metric Learning (LSML)
-====================================
-
-.. automodule:: metric_learn.lsml
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import LSML_Supervised
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- lsml = LSML_Supervised(num_constraints=200)
- lsml.fit(X, Y)
-
-References
-----------
-
diff --git a/doc/metric_learn.mlkr.rst b/doc/metric_learn.mlkr.rst
deleted file mode 100644
index f71697de..00000000
--- a/doc/metric_learn.mlkr.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Metric Learning for Kernel Regression (MLKR)
-============================================
-
-.. automodule:: metric_learn.mlkr
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import MLKR
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- mlkr = MLKR()
- mlkr.fit(X, Y)
-
-References
-----------
-`Information-theoretic Metric Learning `_ Jason V. Davis, et al.
diff --git a/doc/metric_learn.mmc.rst b/doc/metric_learn.mmc.rst
deleted file mode 100644
index bb9031ba..00000000
--- a/doc/metric_learn.mmc.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Mahalanobis Metric Learning for Clustering (MMC)
-================================================
-
-.. automodule:: metric_learn.mmc
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import MMC_Supervised
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- mmc = MMC_Supervised(num_constraints=200)
- mmc.fit(X, Y)
-
-References
-----------
-`Distance metric learning with application to clustering with side-information `_ Xing, Jordan, Russell, Ng.
diff --git a/doc/metric_learn.nca.rst b/doc/metric_learn.nca.rst
deleted file mode 100644
index 00bc4eac..00000000
--- a/doc/metric_learn.nca.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Neighborhood Components Analysis (NCA)
-======================================
-
-.. automodule:: metric_learn.nca
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- import numpy as np
- from metric_learn import NCA
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- nca = NCA(max_iter=1000)
- nca.fit(X, Y)
-
-References
-----------
-
diff --git a/doc/metric_learn.rca.rst b/doc/metric_learn.rca.rst
deleted file mode 100644
index 027d583b..00000000
--- a/doc/metric_learn.rca.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Relative Components Analysis (RCA)
-==================================
-
-.. automodule:: metric_learn.rca
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import RCA_Supervised
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- rca = RCA_Supervised(num_chunks=30, chunk_size=2)
- rca.fit(X, Y)
-
-References
-------------------
-`Adjustment learning and relevant component analysis `_ Noam Shental, et al.
diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst
index eb606542..930404d0 100644
--- a/doc/metric_learn.rst
+++ b/doc/metric_learn.rst
@@ -4,16 +4,52 @@ metric_learn package
Module Contents
---------------
-.. toctree::
-
- metric_learn.constraints
- metric_learn.base_metric
- metric_learn.itml
- metric_learn.lfda
- metric_learn.lmnn
- metric_learn.lsml
- metric_learn.mlkr
- metric_learn.mmc
- metric_learn.nca
- metric_learn.rca
- metric_learn.sdml
+Base Classes
+------------
+
+.. autosummary::
+ :toctree: generated/
+ :template: class.rst
+
+ metric_learn.Constraints
+ metric_learn.base_metric.BaseMetricLearner
+ metric_learn.base_metric._PairsClassifierMixin
+ metric_learn.base_metric._QuadrupletsClassifierMixin
+
+Supervised Learning Algorithms
+------------------------------
+.. autosummary::
+ :toctree: generated/
+ :template: class.rst
+
+ metric_learn.LFDA
+ metric_learn.LMNN
+ metric_learn.MLKR
+ metric_learn.NCA
+ metric_learn.RCA
+ metric_learn.ITML_Supervised
+ metric_learn.LSML_Supervised
+ metric_learn.MMC_Supervised
+ metric_learn.SDML_Supervised
+ metric_learn.RCA_Supervised
+
+Weakly Supervised Learning Algorithms
+-------------------------------------
+
+.. autosummary::
+ :toctree: generated/
+ :template: class.rst
+
+ metric_learn.ITML
+ metric_learn.LSML
+ metric_learn.MMC
+ metric_learn.SDML
+
+Unsupervised Learning Algorithms
+--------------------------------
+
+.. autosummary::
+ :toctree: generated/
+ :template: class.rst
+
+ metric_learn.Covariance
\ No newline at end of file
diff --git a/doc/metric_learn.sdml.rst b/doc/metric_learn.sdml.rst
deleted file mode 100644
index 3e350a70..00000000
--- a/doc/metric_learn.sdml.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-Sparse Determinant Metric Learning (SDML)
-=========================================
-
-.. automodule:: metric_learn.sdml
- :members:
- :undoc-members:
- :inherited-members:
- :show-inheritance:
- :special-members: __init__
-
-Example Code
-------------
-
-::
-
- from metric_learn import SDML_Supervised
- from sklearn.datasets import load_iris
-
- iris_data = load_iris()
- X = iris_data['data']
- Y = iris_data['target']
-
- sdml = SDML_Supervised(num_constraints=200)
- sdml.fit(X, Y)
-
-References
-------------------
diff --git a/doc/supervised.rst b/doc/supervised.rst
index c438294f..5520ce8e 100644
--- a/doc/supervised.rst
+++ b/doc/supervised.rst
@@ -8,46 +8,108 @@ labels `y`, and learn a distance matrix that make points from the same class
other, and points from different classes or with distant target values far away
from each other.
-Scikit-learn compatibility
-==========================
+General API
+===========
-All supervised algorithms are scikit-learn `Estimators`, so they are
-compatible with Pipelining and scikit-learn model selection routines.
+Supervised Metric Learning Algorithms are the easiest metric-learn algorithms
+to use, since they use the same API as ``scikit-learn``.
-Algorithms
-==========
-
-Covariance
+Input data
----------
+In order to train a model, you need two `array-like `_ objects, `X` and `y`. `X`
+should be a 2D array-like of shape `(n_samples, n_features)`, where
+`n_samples` is the number of points of your dataset and `n_features` is the
+number of attributes of each of your points. `y` should be a 1D array-like
+of shape `(n_samples,)`, containing for each point in `X` the class it
+belongs to (or the value to regress for this sample, if you use `MLKR` for
+instance).
-.. todo:: Covariance is unsupervised, so its doc should not be here.
+Here is an example of a dataset of two dogs and one
+cat (the classes are 'dog' and 'cat') an animal being being represented by
+two numbers.
-`Covariance` does not "learn" anything, rather it calculates
-the covariance matrix of the input data. This is a simple baseline method.
+>>> import numpy as np
+>>> X = np.array([[2.3, 3.6], [0.2, 0.5], [6.7, 2.1]])
+>>> y = np.array(['dog', 'cat', 'dog'])
-.. topic:: Example Code:
+.. note::
-::
+ You can also use a preprocessor instead of directly giving the inputs as
+ 2D arrays. See the :ref:`preprocessor_section` section for more details.
- from metric_learn import Covariance
- from sklearn.datasets import load_iris
+Fit, transform, and so on
+-------------------------
+The goal of supervised metric-learning algorithms is to transform
+points in a new space, in which the distance between two points from the
+same class will be small, and the distance between two points from different
+classes will be large. To do so, we fit the metric learner (example:
+`NCA`).
- iris = load_iris()['data']
+>>> from metric_learn import NCA
+>>> nca = NCA(random_state=42)
+>>> nca.fit(X, y)
+NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated',
+ preprocessor=None, random_state=42, tol=None, verbose=False)
- cov = Covariance().fit(iris)
- x = cov.transform(iris)
-.. topic:: References:
+Now that the estimator is fitted, you can use it on new data for several
+purposes.
+
+First, you can transform the data in the learned space, using `transform`:
+Here we transform two points in the new embedding space.
+
+>>> X_new = np.array([[9.4, 4.1], [2.1, 4.4]])
+>>> nca.transform(X_new)
+array([[ 5.91884732, 10.25406973],
+ [ 3.1545886 , 6.80350083]])
+
+Also, as explained before, our metric learners has learn a distance between
+points. You can use this distance in two main ways:
+
+- You can either return the distance between pairs of points using the
+ `score_pairs` function:
+
+>>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]])
+array([0.49627072, 3.65287282])
- .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936
+- Or you can return a function that will return the distance (in the new
+ space) between two 1D arrays (the coordinates of the points in the original
+ space), similarly to distance functions in `scipy.spatial.distance`.
+
+>>> metric_fun = nca.get_metric()
+>>> metric_fun([3.5, 3.6], [5.6, 2.4])
+0.4962707194621285
+
+.. note::
+
+ If the metric learner that you use learns a Mahalanobis Matrix (like it is
+ the case for all algorithms currently in metric-learn), you can get the
+ plain learned Mahalanobis matrix using `get_mahalanobis_matrix`.
+
+ >>> nca.get_mahalanobis_matrix()
+ array([[0.43680409, 0.89169412],
+ [0.89169412, 1.9542479 ]])
+
+.. TODO: remove the "like it is the case etc..." if it's not the case anymore
+
+Scikit-learn compatibility
+--------------------------
+
+All supervised algorithms are scikit-learn `sklearn.base.Estimators`, and
+`sklearn.base.TransformerMixin` so they are compatible with Pipelining and
+scikit-learn model selection routines.
+
+Algorithms
+==========
.. _lmnn:
-LMNN
------
+:py:class:`LMNN `
+-----------------------------------------
Large Margin Nearest Neighbor Metric Learning
-(:py:class:`LMNN `)
+(:py:class:`LMNN `)
`LMNN` learns a Mahalanobis distance metric in the kNN classification
setting. The learned metric attempts to keep close k-nearest neighbors
@@ -97,10 +159,10 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class,
.. _nca:
-NCA
----
+:py:class:`NCA `
+--------------------------------------
-Neighborhood Components Analysis(:py:class:`NCA `)
+Neighborhood Components Analysis(:py:class:`NCA `)
`NCA` is a distance metric learning algorithm which aims to improve the
accuracy of nearest neighbors classification compared to the standard
@@ -161,10 +223,10 @@ the sum of probability of being correctly classified:
.. _lfda:
-LFDA
-----
+:py:class:`LFDA `
+-----------------------------------------
-Local Fisher Discriminant Analysis(:py:class:`LFDA `)
+Local Fisher Discriminant Analysis(:py:class:`LFDA `)
`LFDA` is a linear supervised dimensionality reduction method. It is
particularly useful when dealing with multi-modality, where one ore more classes
@@ -235,10 +297,10 @@ same class are not imposed to be close.
.. _mlkr:
-MLKR
-----
+:py:class:`MLKR `
+-----------------------------------------
-Metric Learning for Kernel Regression(:py:class:`MLKR `)
+Metric Learning for Kernel Regression(:py:class:`MLKR `)
`MLKR` is an algorithm for supervised metric learning, which learns a
distance function by directly minimizing the leave-one-out regression error.
@@ -298,15 +360,35 @@ calculating a weighted average of all the training samples:
Gerald Tesauro
+.. _supervised_version:
+
Supervised versions of weakly-supervised algorithms
---------------------------------------------------
Note that each :ref:`weakly-supervised algorithm `
has a supervised version of the form `*_Supervised` where similarity tuples are
generated from the labels information and passed to the underlying algorithm.
-
-.. todo:: add more details about that (see issue ``_)
+These constraints are sampled randomly under the hood.
+
+For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points
+from the dataset), and labels (`int` indicating whether the two points are
+similar (+1) or dissimilar (-1)), are sampled with the function
+`metric_learn.constraints.positive_negative_pairs`. To sample positive pairs
+(of label +1), this method will look at all the samples from the same label and
+sample randomly a pair among them. To sample negative pairs (of label -1), this
+method will look at all the samples from a different class and sample randomly
+a pair among them. The method will try to build `num_constraints` positive
+pairs and `num_constraints` negative pairs, but sometimes it cannot find enough
+of one of those, so forcing `same_length=True` will return both times the
+minimum of the two lenghts.
+
+For using quadruplets learners (see :ref:`learning_on_quadruplets`) in a
+supervised way, we will basically sample positive and negative pairs like
+before, but we'll just concatenate them, so that we have a 3D array of
+quadruplets, where for each quadruplet the two first points are in fact points
+from the same class, and the two last points are in fact points from a
+different class (so indeed the two last points should be less similar than the
+two first points).
.. topic:: Example Code:
diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst
new file mode 100644
index 00000000..1d5bef43
--- /dev/null
+++ b/doc/unsupervised.rst
@@ -0,0 +1,37 @@
+============================
+Unsupervised Metric Learning
+============================
+
+Unsupervised metric learning algorithms just take as input points `X`. For
+now, in metric-learn, there only is `Covariance`, which is a simple
+baseline algorithm (see below).
+
+
+Algorithms
+==========
+.. _covariance:
+
+Covariance
+----------
+
+`Covariance` does not "learn" anything, rather it calculates
+the covariance matrix of the input data. This is a simple baseline method.
+It can be used for ZCA whitening of the data (see the Wikipedia page of
+`whitening transformation `_).
+
+.. topic:: Example Code:
+
+::
+
+ from metric_learn import Covariance
+ from sklearn.datasets import load_iris
+
+ iris = load_iris()['data']
+
+ cov = Covariance().fit(iris)
+ x = cov.transform(iris)
+
+.. topic:: References:
+
+ .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936
\ No newline at end of file
diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
index 351c4e3b..7e488ac7 100644
--- a/doc/weakly_supervised.rst
+++ b/doc/weakly_supervised.rst
@@ -11,17 +11,28 @@ and dissimilar points. Refer to the documentation of each algorithm for its
particular form of input data.
+General API
+===========
+
Input data
-==========
+----------
In the following paragraph we talk about tuples for sake of generality. These
can be pairs, triplets, quadruplets etc, depending on the particular metric
learning algorithm we use.
Basic form
-----------
-Every weakly supervised algorithm will take as input tuples of points, and if
-needed labels for theses tuples.
+^^^^^^^^^^
+
+Every weakly supervised algorithm will take as input tuples of
+points, and if needed labels for theses tuples. The tuples of points can
+also be called "constraints". They are a set of points that we consider (ex:
+two points, three points, etc...). The label is some information we have
+about this set of points (e.g. "these two points are similar"). Note that
+some information can be contained in the ordering of these tuples (see for
+instance the section :ref:`learning_on_quadruplets`). For more details about
+the specific of each algorithms, refer to the appropriate section: either
+:ref:`learning_on_pairs` or :ref:`learning_on_quadruplets`)
The `tuples` argument is the first argument of every method (like the X
@@ -44,7 +55,7 @@ These are two data structures that can be used to represent tuple in metric
learn:
3D array of tuples
-------------------
+^^^^^^^^^^^^^^^^^^
The most intuitive way to represent tuples is to provide the algorithm with a
3D array-like of tuples of shape ``(n_tuples, t, n_features)``, where
@@ -62,10 +73,10 @@ the number of features of each point.
>>> [[-2.16, +0.11, -0.02],
>>> [+1.58, +0.16, +0.93]],
>>>
->>> [[+1.58, +0.16, +0.93 ], # same as tuples[1, 1, :]
+>>> [[+1.58, +0.16, +0.93], # same as tuples[1, 1, :]
>>> [+0.89, -0.34, +2.41]],
>>>
->>> [[-0.12, -1.21, -0.20 ], # same as tuples[0, 0, :]
+>>> [[-0.12, -1.21, -0.20], # same as tuples[0, 0, :]
>>> [-2.16, +0.11, -0.02]]]) # same as tuples[1, 0, :]
>>> y = np.array([-1, 1, 1, -1])
@@ -77,7 +88,7 @@ the number of features of each point.
2D array of indicators + preprocessor
--------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Instead of forming each point in each tuple, a more efficient representation
would be to keep the dataset of points ``X`` aside, and just represent tuples
@@ -101,13 +112,7 @@ the feature dimension there, the resulting array is 2D.
In order to fit metric learning algorithms with this type of input, we need to
give the original dataset of points ``X`` to the estimator so that it knows
the points the indices refer to. We do this when initializing the estimator,
-through the argument `preprocessor`.
-
-.. topic:: Example:
-
->>> from metric_learn import MMC
->>> mmc = MMC(preprocessor=X)
->>> mmc.fit(pairs_indice, y)
+through the argument `preprocessor` (see below :ref:`fit_ws`)
.. note::
@@ -118,17 +123,85 @@ through the argument `preprocessor`.
paths in the filesystem, name of records in a database etc...) See section
:ref:`preprocessor_section` for more details on how to use the preprocessor.
-.. _sklearn_compat_ws:
+.. _fit_ws:
+
+Fit, transform, and so on
+-------------------------
+
+The goal of weakly-supervised metric-learning algorithms is to transform
+points in a new space, in which the tuple-wise constraints between points
+are respected.
+
+>>> from metric_learn import MMC
+>>> mmc = MMC(random_state=42)
+>>> mmc.fit(tuples, y)
+MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
+ diagonal_c=1.0, init=None, max_iter=100, max_proj=10000,
+ preprocessor=None, random_state=42, verbose=False)
+
+Or alternatively (using a preprocessor):
+
+>>> from metric_learn import MMC
+>>> mmc = MMC(preprocessor=X, random_state=42)
+>>> mmc.fit(pairs_indice, y)
+
+
+Now that the estimator is fitted, you can use it on new data for several
+purposes.
+
+First, you can transform the data in the learned space, using `transform`:
+Here we transform two points in the new embedding space.
+
+>>> X_new = np.array([[9.4, 4.1, 4.2], [2.1, 4.4, 2.3]])
+>>> mmc.transform(X_new)
+array([[-3.24667162e+01, 4.62622348e-07, 3.88325421e-08],
+ [-3.61531114e+01, 4.86778289e-07, 2.12654397e-08]])
+
+Also, as explained before, our metric learners has learn a distance between
+points. You can use this distance in two main ways:
+
+- You can either return the distance between pairs of points using the
+ `score_pairs` function:
+
+>>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]],
+... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]])
+array([7.27607365, 0.88853014])
+
+- Or you can return a function that will return the distance
+ (in the new space) between two 1D arrays (the coordinates of the points in
+ the original space), similarly to distance functions in
+ `scipy.spatial.distance`. To do that, use the `get_metric` method.
+>>> metric_fun = mmc.get_metric()
+>>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7])
+7.276073646278203
+
+.. note::
+
+ If the metric learner that you use learns a Mahalanobis Matrix (like it is
+ the case for all algorithms currently in metric-learn), you can get the
+ plain Mahalanobis matrix using `get_mahalanobis_matrix`.
+
+>>> mmc.get_mahalanobis_matrix()
+array([[ 0.58603894, -5.69883982, -1.66614919],
+ [-5.69883982, 55.41743549, 16.20219519],
+ [-1.66614919, 16.20219519, 4.73697721]])
+
+.. TODO: remove the "like it is the case etc..." if it's not the case anymore
+
+.. _sklearn_compat_ws:
+
Scikit-learn compatibility
-==========================
+--------------------------
Weakly supervised estimators are compatible with scikit-learn routines for
model selection (grid-search, cross-validation etc). See the scoring section
-for more details on the scoring used in the case of Weakly Supervised
-Metric Learning.
+of the appropriate algorithm (:ref:`pairs learners `
+or :ref:`quadruplets learners `)
+for more details on the scoring used in the case of Weakly Supervised Metric
+Learning.
-.. topic:: Example
+Example:
>>> from metric_learn import MMC
>>> from sklearn.datasets import load_iris
@@ -141,13 +214,22 @@ Metric Learning.
>>> mmc = MMC(preprocessor=X)
>>> cross_val_score(mmc, pairs_indices, y)
-Scoring
-=======
+Prediction and scoring
+----------------------
+
+Since weakly supervised are also able, after being fitted, to predict for a
+given tuple what is its label (for pairs) or ordering (for quadruplets). See
+the appropriate section for more details, either :ref:`this
+one ` for pairs, or :ref:`this one
+` for quadruplets.
-Some default scoring are implemented in metric-learn, depending on the kind of
-tuples you're working with (pairs, triplets...). See the docstring of the
-`score` method of the estimator you use.
+They also implement a default scoring method, `score`, that can be
+used to evaluate the performance of a metric-learner on a test dataset. See
+the appropriate section for more details, either :ref:`this
+one ` for pairs, or :ref:`this one `
+for quadruplets.
+.. _learning_on_pairs:
Learning on pairs
=================
@@ -158,15 +240,46 @@ corresponding target containing ``n_samples`` values being either +1 or -1.
These values indicate whether the given pairs are similar points or
dissimilar points.
+Fitting
+-------
+Here is an example for fitting on pairs (see :ref:`fit_ws` for more details on
+the input data format and how to fit, in the general case of learning on
+tuples).
+
+>>> from metric_learn import MMC
+>>> pairs = np.array([[[1.2, 3.2], [2.3, 5.5]],
+>>> [[4.5, 2.3], [2.1, 2.3]]])
+>>> y_pairs = np.array([1, -1])
+>>> mmc = MMC(random_state=42)
+>>> mmc.fit(pairs, y_pairs)
+MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
+ diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None,
+ random_state=42, verbose=False)
+
+Here, we learned a metric that puts the two first points closer
+together in the transformed space, and the two next points further away from
+each other.
+
+.. _pairs_predicting:
+
+Predicting
+----------
+
+When a pairs learner is fitted, it is also able to predict, for an
+upcoming pair, whether it is a pair of similar or dissimilar points.
+
+>>> mmc.predict([[[0.6, 1.6], [1.15, 2.75]],
+... [[3.2, 1.1], [5.4, 6.1]]])
+array([1, -1])
.. _calibration:
Thresholding
------------
In order to predict whether a new pair represents similar or dissimilar
-samples, we need to set a distance threshold, so that points closer (in the
-learned space) than this threshold are predicted as similar, and points further
-away are predicted as dissimilar. Several methods are possible for this
+samples, we in fact need to set a distance threshold, so that points closer (in
+the learned space) than this threshold are predicted as similar, and points
+further away are predicted as dissimilar. Several methods are possible for this
thresholding.
- **At fit time**: The threshold is set with `calibrate_threshold` (see
@@ -177,26 +290,73 @@ thresholding.
overfitting. If you want to avoid that, calibrate the threshold after
fitting, on a validation set.
+ >>> mmc.fit(pairs, y) # will fit the threshold automatically after fitting
+
- **Manual**: calling `set_threshold` will set the threshold to a
particular value.
+ >>> mmc.set_threshold(0.4)
+
- **Calibration**: calling `calibrate_threshold` will calibrate the
threshold to achieve a particular score on a validation set, the score
being among the classical scores for classification (accuracy, f1 score...).
+ >>> mmc.calibrate_threshold(pairs, y)
See also: `sklearn.calibration`.
+.. _pairs_scoring:
+
+Scoring
+-------
+
+Not only are they able to predict the label of given pairs, they can also
+return a `decision_function` for a set of pairs. It is basically the "score"
+that will be thresholded to find the prediction for the pair. In fact this
+"score" is the opposite of the distance in the new space (higher score means
+ points are similar, and lower score dissimilar).
+
+>>> mmc.decision_function([[[0.6, 1.6], [1.15, 2.75]],
+... [[3.2, 1.1], [5.4, 6.1]]])
+array([-0.12811124, -0.74750256])
+
+This allows to return all kinds of estimator scoring usually used in classic
+classification tasks, like `sklearn.metrics.accuracy` for instance, which
+can be used inside cross-validation routines:
+
+>>> from sklearn.model_selection import cross_val_score
+>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]],
+... [[3.2, 1.1], [5.4, 6.1]],
+... [[7.7, 5.6], [1.23, 8.4]]])
+>>> y_test = np.array([-1., 1., -1.])
+>>> cross_val_score(mmc, pairs_test, y_test, scoring='accuracy')
+array([1., 0., 1.])
+
+Pairs learners also have a default score, which basically
+returns the `sklearn.metrics.roc_auc_score` (therefore is not dependent on
+the threshold).
+
+>>> pairs_test = np.array([[[0.6, 1.6], [1.15, 2.75]],
+... [[3.2, 1.1], [5.4, 6.1]],
+... [[7.7, 5.6], [1.23, 8.4]]])
+>>> y_test = np.array([-1., 1., -1.])
+>>> mmc.score(pairs_test, y_test)
+0.5
+
+.. note::
+ See :ref:`fit_ws` for more details on metric learners functions that are
+ not specific to learning on pairs, like `transform`, `score_pairs`,
+ `get_metric` and `get_mahalanobis_matrix`.
Algorithms
-==========
+----------
.. _itml:
-ITML
-----
+:py:class:`ITML `
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Information Theoretic Metric Learning(:py:class:`ITML `)
+Information Theoretic Metric Learning(:py:class:`ITML `)
`ITML` minimizes the (differential) relative entropy, aka Kullback–Leibler
divergence, between two multivariate Gaussians subject to constraints on the
@@ -270,99 +430,13 @@ is the prior distance metric, set to identity matrix by default,
itml/
-.. _lsml:
-
-LSML
-----
-
-Metric Learning from Relative Comparisons by Minimizing Squared Residual
-(:py:class:`LSML `)
-
-`LSML` proposes a simple, yet effective, algorithm that minimizes a convex
-objective function corresponding to the sum of squared residuals of
-constraints. This algorithm uses the constraints in the form of the
-relative distance comparisons, such method is especially useful where
-pairwise constraints are not natural to obtain, thus pairwise constraints
-based algorithms become infeasible to be deployed. Furthermore, its sparsity
-extension leads to more stable estimation when the dimension is high and
-only a small amount of constraints is given.
-
-The loss function of each constraint
-:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is
-denoted as:
-
-.. math::
-
- H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b)
- - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d))
-
-where :math:`H(\cdot)` is the squared Hinge loss function defined as:
-
-.. math::
-
- H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\
- \,\,x^2 \qquad x>0\end{aligned}\right.\\
-
-The summed loss function :math:`L(C)` is the simple sum over all constraints
-:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d)
-: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The
-original paper suggested here should be a weighted sum since the confidence
-or probability of each constraint might differ. However, for the sake of
-simplicity and assumption of no extra knowledge provided, we just deploy
-the simple sum here as well as what the authors did in the experiments.
-
-The distance metric learning problem becomes minimizing the summed loss
-function of all constraints plus a regularization term w.r.t. the prior
-knowledge:
-
-.. math::
-
- \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a,
- \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}(
- \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\
-
-where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity
-by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence:
-
-.. math::
-
- D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet}
- (\mathbf{M})
-
-.. topic:: Example Code:
-
-::
-
- from metric_learn import LSML
-
- quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]],
- [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]],
- [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]],
- [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]]
-
- # we want to make closer points where the first feature is close, and
- # further if the second feature is close
-
- lsml = LSML()
- lsml.fit(quadruplets)
-
-.. topic:: References:
-
- .. [1] Liu et al.
- "Metric Learning from Relative Comparisons by Minimizing Squared
- Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf
-
- .. [2] Adapted from https://gist.github.com/kcarnold/5439917
-
.. _sdml:
-=======
-
-SDML
-----
+:py:class:`SDML `
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Sparse High-Dimensional Metric Learning
-(:py:class:`SDML `)
+(:py:class:`SDML `)
`SDML` is an efficient sparse metric learning in high-dimensional space via
double regularization: an L1-penalization on the off-diagonal elements of the
@@ -418,10 +492,10 @@ is the off-diagonal L1 norm.
.. _rca:
-RCA
----
+:py:class:`RCA `
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Relative Components Analysis (:py:class:`RCA `)
+Relative Components Analysis (:py:class:`RCA `)
`RCA` learns a full rank Mahalanobis distance metric based on a weighted sum of
in-chunklets covariance matrices. It applies a global linear transformation to
@@ -474,11 +548,11 @@ as the Mahalanobis matrix.
.. _mmc:
-MMC
----
+:py:class:`MMC `
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Metric Learning with Application for Clustering with Side Information
-(:py:class:`MMC `)
+(:py:class:`MMC `)
`MMC` minimizes the sum of squared distances between similar points, while
enforcing the sum of distances between dissimilar ones to be greater than one.
@@ -528,23 +602,185 @@ points, while constrains the sum of distances between dissimilar points:
.. [2] Adapted from Matlab code `here `_.
+
+.. _learning_on_quadruplets:
+
Learning on quadruplets
=======================
-A type of information even weaker than pairs is information about relative
-comparisons between pairs. The user should provide the algorithm with a
-quadruplet of points, where the two first points are closer than the two
-last points. No target vector (``y``) is needed, since the supervision is
-already in the order that points are given in the quadruplet.
+
+
+The goal of weakly-supervised metric-learning algorithms is to transform
+points in a new space, in which the tuple-wise constraints between points
+are respected.
+
+Fitting
+-------
+Here is an example for fitting on quadruplets (see :ref:`fit_ws` for more
+details on the input data format and how to fit, in the general case of
+learning on tuples).
+
+>>> from metric_learn import LSML
+>>> quadruplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.4, 6.7], [2.1, 0.6]],
+>>> [[4.5, 2.3], [2.1, 2.3], [0.6, 1.2], [7.3, 3.4]]])
+>>> lsml = LSML(random_state=42)
+>>> lsml.fit(quadruplets)
+LSML(max_iter=1000, preprocessor=None, prior=None, random_state=42, tol=0.001,
+ verbose=False)
+
+Or alternatively (using a preprocessor):
+
+>>> X = np.array([[1.2, 3.2],
+>>> [2.3, 5.5],
+>>> [2.4, 6.7],
+>>> [2.1, 0.6],
+>>> [4.5, 2.3],
+>>> [2.1, 2.3],
+>>> [0.6, 1.2],
+>>> [7.3, 3.4]])
+>>> quadruplets_indices = np.array([[0, 1, 2, 3], [4, 5, 6, 7]])
+>>> lsml = LSML(preprocessor=X, random_state=42)
+>>> lsml.fit(quadruplets_indices)
+LSML(max_iter=1000,
+ preprocessor=array([[1.2, 3.2],
+ [2.3, 5.5],
+ [2.4, 6.7],
+ [2.1, 0.6],
+ [4.5, 2.3],
+ [2.1, 2.3],
+ [0.6, 1.2],
+ [7.3, 3.4]]),
+ prior=None, random_state=42, tol=0.001, verbose=False)
+
+
+Here, we want to learn a metric that, for each of the two
+`quadruplets`, will put the two first points closer together than the two
+last points.
+
+.. _quadruplets_predicting:
+
+Predicting
+----------
+
+When a quadruplets learner is fitted, it is also able to predict, for an
+upcoming quadruplet, whether the two first points are more similar than the
+two last points (+1), or not (-1).
+
+>>> quadruplets_test = np.array(
+... [[[5.6, 5.3], [2.2, 2.1], [0.4, 0.6], [1.2, 3.4]],
+... [[6.0, 4.2], [4.3, 1.2], [4.5, 0.6], [0.1, 7.8]]])
+>>> lsml.predict(quadruplets_test)
+array([-1., 1.])
+
+.. _quadruplets_scoring:
+
+Scoring
+-------
+
+Not only are they able to predict the label of given pairs, they can also
+return a `decision_function` for a set of pairs. It is basically the "score"
+which sign will be taken to find the prediction for the pair. In fact this
+"score" is the difference between the distance between the two last points,
+and the distance between the two last points of the quadruplet (higher
+score means the two last points are more likely to be more dissimilar than
+the two first points (i.e. more likely to have a +1 prediction since it's
+the right ordering)).
+
+>>> lsml.decision_function(quadruplets_test)
+array([-1.75700306, 4.98982131])
+
+In the above example, for the first quadruplet in `quadruplets_test`, the
+two first points are predicted less similar than the two last points (they
+are further away in the transformed space).
+
+Unlike for pairs learners, quadruplets learners don't allow to give a `y`
+when fitting, which does not allow to use scikit-learn scoring functions
+like:
+
+>>> from sklearn.model_selection import cross_val_score
+>>> cross_val_score(lsml, quadruplets, scoring='f1_score') # this won't work
+
+(This is actually intentional, for more details
+about that, see
+`this comment `_
+on github.)
+
+However, quadruplets learners do have a default scoring function, which will
+basically return the accuracy score on a given test set, i.e. the proportion
+of quadruplets have the right predicted ordering.
+
+>>> lsml.score(quadruplets_test)
+0.5
+
+.. note::
+ See :ref:`fit_ws` for more details on metric learners functions that are
+ not specific to learning on pairs, like `transform`, `score_pairs`,
+ `get_metric` and `get_mahalanobis_matrix`.
+
+
+
Algorithms
-==========
+----------
+
+.. _lsml:
-LSML
-----
+:py:class:`LSML `
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared
-Residual
+Metric Learning from Relative Comparisons by Minimizing Squared Residual
+(:py:class:`LSML `)
+
+`LSML` proposes a simple, yet effective, algorithm that minimizes a convex
+objective function corresponding to the sum of squared residuals of
+constraints. This algorithm uses the constraints in the form of the
+relative distance comparisons, such method is especially useful where
+pairwise constraints are not natural to obtain, thus pairwise constraints
+based algorithms become infeasible to be deployed. Furthermore, its sparsity
+extension leads to more stable estimation when the dimension is high and
+only a small amount of constraints is given.
+
+The loss function of each constraint
+:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is
+denoted as:
+
+.. math::
+
+ H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b)
+ - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d))
+
+where :math:`H(\cdot)` is the squared Hinge loss function defined as:
+
+.. math::
+
+ H(x) = \left\{\begin{aligned}0 \qquad x\leq 0 \\
+ \,\,x^2 \qquad x>0\end{aligned}\right.\\
+
+The summed loss function :math:`L(C)` is the simple sum over all constraints
+:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d)
+: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The
+original paper suggested here should be a weighted sum since the confidence
+or probability of each constraint might differ. However, for the sake of
+simplicity and assumption of no extra knowledge provided, we just deploy
+the simple sum here as well as what the authors did in the experiments.
+
+The distance metric learning problem becomes minimizing the summed loss
+function of all constraints plus a regularization term w.r.t. the prior
+knowledge:
+
+.. math::
+
+ \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a,
+ \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}(
+ \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\
+
+where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity
+by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence:
+
+.. math::
+
+ D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet}
+ (\mathbf{M})
.. topic:: Example Code:
@@ -570,3 +806,5 @@ Residual
Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf
.. [2] Adapted from https://gist.github.com/kcarnold/5439917
+
+
diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py
index fd6cff20..b46d1adc 100644
--- a/examples/plot_metric_learning_examples.py
+++ b/examples/plot_metric_learning_examples.py
@@ -130,7 +130,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`LMNN
-# `
+# `
######################################################################
@@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# setting up LMNN
-lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)
+lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6, init='random')
# fit the data!
lmnn.fit(X, y)
@@ -181,7 +181,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`ITML
-# `
+# `
itml = metric_learn.ITML_Supervised()
X_itml = itml.fit_transform(X, y)
@@ -200,12 +200,12 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`MMC
-# `
+# `
-itml = metric_learn.ITML_Supervised()
-X_itml = itml.fit_transform(X, y)
+mmc = metric_learn.MMC_Supervised()
+X_mmc = mmc.fit_transform(X, y)
-plot_tsne(X_itml, y)
+plot_tsne(X_mmc, y)
######################################################################
# Sparse Determinant Metric Learning
@@ -219,9 +219,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`SDML
-# `
+# `
-sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015)
+sdml = metric_learn.SDML_Supervised(sparsity_param=0.1, balance_param=0.0015,
+ prior='covariance')
X_sdml = sdml.fit_transform(X, y)
plot_tsne(X_sdml, y)
@@ -238,9 +239,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`LSML
-# `
+# `
-lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000)
+lsml = metric_learn.LSML_Supervised(tol=0.0001, max_iter=10000,
+ prior='covariance')
X_lsml = lsml.fit_transform(X, y)
plot_tsne(X_lsml, y)
@@ -265,7 +267,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`NCA
-# `
+# `
nca = metric_learn.NCA(max_iter=1000)
X_nca = nca.fit_transform(X, y)
@@ -285,7 +287,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`LFDA
-# `
+# `
lfda = metric_learn.LFDA(k=2, num_dims=2)
X_lfda = lfda.fit_transform(X, y)
@@ -306,7 +308,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`RCA
-# `
+# `
rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)
X_rca = rca.fit_transform(X, y)
@@ -326,7 +328,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#
# - See more in the :ref:`User Guide `
# - See more in the documentation of the class :py:class:`MLKR
-# `
+# `
#
# To illustrate MLKR, let's use the dataset
# `sklearn.datasets.make_regression` the same way as we did with the
@@ -445,8 +447,8 @@ def create_constraints(labels):
######################################################################
# Using our constraints, let's now train ITML again. Note that we are no
# longer calling the supervised class :py:class:`ITML_Supervised
-# ` but the more generic
-# (weakly-supervised) :py:class:`ITML `, which
+# ` but the more generic
+# (weakly-supervised) :py:class:`ITML `, which
# takes the dataset `X` through the `preprocessor` argument (see
# :ref:`this section ` of the documentation to learn
# about more advanced uses of `preprocessor`) and the pair information `pairs`
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index aa7d66dd..1e5fa974 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -1,3 +1,7 @@
+"""
+Base module.
+"""
+
from sklearn.base import BaseEstimator
from sklearn.utils.extmath import stable_cumsum
from sklearn.utils.validation import _is_arraylike, check_is_fitted
@@ -10,16 +14,17 @@
class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)):
+ """
+ Base class for all metric-learners.
- def __init__(self, preprocessor=None):
- """
+ Parameters
+ ----------
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be gotten like this: X[indices].
+ """
- Parameters
- ----------
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be gotten like this: X[indices].
- """
+ def __init__(self, preprocessor=None):
self.preprocessor = preprocessor
@abstractmethod
@@ -277,6 +282,8 @@ def metric_fun(u, v, squared=False):
get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__
def metric(self):
+ """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix`
+ instead"""
# TODO: remove this method in version 0.6.0
warnings.warn(("`metric` is deprecated since version 0.5.0 and will be "
"removed in 0.6.0. Use `get_mahalanobis_matrix` instead."),
@@ -295,7 +302,8 @@ def get_mahalanobis_matrix(self):
class _PairsClassifierMixin(BaseMetricLearner):
- """
+ """Base class for pairs learners.
+
Attributes
----------
threshold_ : `float`
@@ -567,6 +575,8 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None,
class _QuadrupletsClassifierMixin(BaseMetricLearner):
+ """Base class for quadruplets learners.
+ """
_tuple_size = 4 # number of points in a tuple, 4 for quadruplets
@@ -578,7 +588,7 @@ def predict(self, quadruplets):
Parameters
----------
- quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or
+ quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \
(n_quadruplets, 4)
3D Array of quadruplets to predict, with each row corresponding to four
points, or 2D array of indices of quadruplets if the metric learner
@@ -607,7 +617,7 @@ def decision_function(self, quadruplets):
Parameters
----------
- quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or
+ quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \
(n_quadruplets, 4)
3D Array of quadruplets to predict, with each row corresponding to four
points, or 2D array of indices of quadruplets if the metric learner
@@ -630,7 +640,7 @@ def score(self, quadruplets):
Parameters
----------
- quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or
+ quadruplets : array-like, shape=(n_quadruplets, 4, n_features) or \
(n_quadruplets, 4)
3D Array of quadruplets to score, with each row corresponding to four
points, or 2D array of indices of quadruplets if the metric learner
diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py
index e591830b..069a6564 100644
--- a/metric_learn/constraints.py
+++ b/metric_learn/constraints.py
@@ -11,6 +11,11 @@
class Constraints(object):
+ """
+ Class to build constraints from labels.
+
+ See more in the :ref:`User Guide `
+ """
def __init__(self, partial_labels):
'''partial_labels : int arraylike, -1 indicating unknown label'''
partial_labels = np.asanyarray(partial_labels, dtype=int)
diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py
index 19dad5d8..b9666494 100644
--- a/metric_learn/covariance.py
+++ b/metric_learn/covariance.py
@@ -1,11 +1,5 @@
"""
Covariance metric (baseline method)
-
-This method does not "learn" anything, rather it calculates
-the covariance matrix of the input data.
-
-This is a simple baseline method first introduced in
-On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936
"""
from __future__ import absolute_import
@@ -20,11 +14,28 @@
class Covariance(MahalanobisMixin, TransformerMixin):
"""Covariance metric (baseline method)
+ This method does not "learn" anything, rather it calculates
+ the covariance matrix of the input data.
+
+ This is a simple baseline method first introduced in
+ On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936
+
+ Read more in the :ref:`User Guide `.
+
Attributes
----------
transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
+
+ Examples
+ --------
+ >>> from metric_learn import Covariance
+ >>> from sklearn.datasets import load_iris
+ >>> iris = load_iris()['data']
+ >>> cov = Covariance().fit(iris)
+ >>> x = cov.transform(iris)
+
"""
def __init__(self, preprocessor=None):
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 21303c18..16fc21db 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -1,17 +1,5 @@
-r"""
-Information Theoretic Metric Learning(ITML)
-
-`ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler
-divergence, between two multivariate Gaussians subject to constraints on the
-associated Mahalanobis distance, which can be formulated into a Bregman
-optimization problem by minimizing the LogDet divergence subject to
-linear constraints. This algorithm can handle a wide variety of constraints
-and can optionally incorporate a prior on the distance function. Unlike some
-other methods, `ITML` does not rely on an eigenvalue computation or
-semi-definite programming.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Information Theoretic Metric Learning (ITML)
"""
from __future__ import print_function, absolute_import
@@ -34,55 +22,6 @@ class _BaseITML(MahalanobisMixin):
def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
prior='identity', A0='deprecated', verbose=False,
preprocessor=None, random_state=None):
- """Initialize ITML.
-
- Parameters
- ----------
- gamma : float, optional
- value for slack variables
-
- max_iter : int, optional
-
- convergence_threshold : float, optional
-
- prior : string or numpy array, optional (default='identity')
- The Mahalanobis matrix to use as a prior. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of shape
- (n_features, n_features). For ITML, the prior should be strictly
- positive definite (PD).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The prior will be a random SPD matrix of shape
- `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
-
- A0 : Not used
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'prior' instead.
-
- verbose : bool, optional
- if True, prints information while learning
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
-
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``prior='random'``, ``random_state`` is used to set the prior.
- """
self.gamma = gamma
self.max_iter = max_iter
self.convergence_threshold = convergence_threshold
@@ -172,6 +111,66 @@ def _fit(self, pairs, y, bounds=None):
class ITML(_BaseITML, _PairsClassifierMixin):
"""Information Theoretic Metric Learning (ITML)
+ `ITML` minimizes the (differential) relative entropy, aka Kullback-Leibler
+ divergence, between two multivariate Gaussians subject to constraints on the
+ associated Mahalanobis distance, which can be formulated into a Bregman
+ optimization problem by minimizing the LogDet divergence subject to
+ linear constraints. This algorithm can handle a wide variety of constraints
+ and can optionally incorporate a prior on the distance function. Unlike some
+ other methods, `ITML` does not rely on an eigenvalue computation or
+ semi-definite programming.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ gamma : float, optional (default=1.)
+ Value for slack variables
+
+ max_iter : int, optional (default=1000)
+ Maximum number of iteration of the optimization procedure.
+
+ convergence_threshold : float, optional (default=1e-3)
+ Convergence tolerance.
+
+ prior : string or numpy array, optional (default='identity')
+ The Mahalanobis matrix to use as a prior. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of shape
+ (n_features, n_features). For ITML, the prior should be strictly
+ positive definite (PD).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The prior will be a random SPD matrix of shape
+ `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+
+ A0 : Not used
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'prior' instead.
+
+ verbose : bool, optional (default=False)
+ If True, prints information while learning
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``prior='random'``, ``random_state`` is used to set the prior.
+
Attributes
----------
bounds_ : `numpy.ndarray`, shape=(2,)
@@ -194,6 +193,22 @@ class ITML(_BaseITML, _PairsClassifierMixin):
If the distance metric between two points is lower than this threshold,
points will be classified as similar, otherwise they will be
classified as dissimilar.
+
+ Examples
+ --------
+ >>> from metric_learn import ITML_Supervised
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> itml = ITML_Supervised(num_constraints=200)
+ >>> itml.fit(X, Y)
+
+ References
+ ----------
+ .. [1] `Information-theoretic Metric Learning
+ `_ Jason V. Davis, et al.
"""
def fit(self, pairs, y, bounds=None, calibration_params=None):
@@ -204,7 +219,7 @@ def fit(self, pairs, y, bounds=None, calibration_params=None):
Parameters
----------
- pairs: array-like, shape=(n_constraints, 2, n_features) or
+ pairs: array-like, shape=(n_constraints, 2, n_features) or \
(n_constraints, 2)
3D Array of pairs with each row corresponding to two points,
or 2D array of indices of pairs if the metric learner uses a
@@ -240,6 +255,64 @@ def fit(self, pairs, y, bounds=None, calibration_params=None):
class ITML_Supervised(_BaseITML, TransformerMixin):
"""Supervised version of Information Theoretic Metric Learning (ITML)
+ `ITML_Supervised` creates pairs of similar sample by taking same class
+ samples, and pairs of dissimilar samples by taking different class
+ samples. It then passes these pairs to `ITML` for training.
+
+ Parameters
+ ----------
+ gamma : float, optional
+ value for slack variables
+ max_iter : int, optional
+ convergence_threshold : float, optional
+ num_labeled : Not used
+ .. deprecated:: 0.5.0
+ `num_labeled` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0.
+ num_constraints: int, optional
+ number of constraints to generate
+ bounds : Not used
+ .. deprecated:: 0.5.0
+ `bounds` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Set `bounds` at fit time instead :
+ `itml_supervised.fit(X, y, bounds=...)`
+
+ prior : string or numpy array, optional (default='identity')
+ Initialization of the Mahalanobis matrix. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of shape
+ (n_features, n_features). For ITML, the prior should be strictly
+ positive definite (PD).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The prior will be a random SPD matrix of shape
+ `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+
+ A0 : Not used
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'prior' instead.
+ verbose : bool, optional
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``prior='random'``, ``random_state`` is used to set the prior.
+
+
Attributes
----------
bounds_ : `numpy.ndarray`, shape=(2,)
@@ -257,71 +330,18 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
+
+ See Also
+ --------
+ metric_learn.ITML : The original weakly-supervised algorithm
+ :ref:`supervised_version` : The section of the project documentation
+ that describes the supervised version of weakly supervised estimators.
"""
def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
num_labeled='deprecated', num_constraints=None,
bounds='deprecated', prior='identity', A0='deprecated',
verbose=False, preprocessor=None, random_state=None):
- """Initialize the supervised version of `ITML`.
-
- `ITML_Supervised` creates pairs of similar sample by taking same class
- samples, and pairs of dissimilar samples by taking different class
- samples. It then passes these pairs to `ITML` for training.
-
- Parameters
- ----------
- gamma : float, optional
- value for slack variables
- max_iter : int, optional
- convergence_threshold : float, optional
- num_labeled : Not used
- .. deprecated:: 0.5.0
- `num_labeled` was deprecated in version 0.5.0 and will
- be removed in 0.6.0.
- num_constraints: int, optional
- number of constraints to generate
- bounds : Not used
- .. deprecated:: 0.5.0
- `bounds` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Set `bounds` at fit time instead :
- `itml_supervised.fit(X, y, bounds=...)`
-
- prior : string or numpy array, optional (default='identity')
- Initialization of the Mahalanobis matrix. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of shape
- (n_features, n_features). For ITML, the prior should be strictly
- positive definite (PD).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The prior will be a random SPD matrix of shape
- `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
-
- A0 : Not used
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'prior' instead.
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``prior='random'``, ``random_state`` is used to set the prior.
- """
_BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
convergence_threshold=convergence_threshold,
A0=A0, prior=prior, verbose=verbose,
diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py
index 1851a734..6c651b7b 100644
--- a/metric_learn/lfda.py
+++ b/metric_learn/lfda.py
@@ -1,13 +1,5 @@
-r"""
-Local Fisher Discriminant Analysis(LFDA)
-
-LFDA is a linear supervised dimensionality reduction method. It is
-particularly useful when dealing with multimodality, where one ore more classes
-consist of separate clusters in input space. The core optimization problem of
-LFDA is solved as a generalized eigenvalue problem.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Local Fisher Discriminant Analysis (LFDA)
"""
from __future__ import division, absolute_import
import numpy as np
@@ -24,44 +16,69 @@
class LFDA(MahalanobisMixin, TransformerMixin):
'''
Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction
- Sugiyama, ICML 2006
- Attributes
+ LFDA is a linear supervised dimensionality reduction method. It is
+ particularly useful when dealing with multimodality, where one ore more
+ classes consist of separate clusters in input space. The core optimization
+ problem of LFDA is solved as a generalized eigenvalue problem.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
----------
- transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
- The learned linear transformation ``L``.
- '''
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
- def __init__(self, n_components=None, num_dims='deprecated',
- k=None, embedding_type='weighted', preprocessor=None):
- '''
- Initialize LFDA.
+ num_dims : Not used
- Parameters
- ----------
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
- num_dims : Not used
+ k : int, optional
+ Number of nearest neighbors used in local scaling method.
+ Defaults to min(7, n_components - 1).
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
+ embedding_type : str, optional
+ Type of metric in the embedding space (default: 'weighted')
+ 'weighted' - weighted eigenvectors
+ 'orthonormalized' - orthonormalized
+ 'plain' - raw eigenvectors
- k : int, optional
- Number of nearest neighbors used in local scaling method.
- Defaults to min(7, n_components - 1).
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
+ Attributes
+ ----------
+ transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
+ The learned linear transformation ``L``.
- embedding_type : str, optional
- Type of metric in the embedding space (default: 'weighted')
- 'weighted' - weighted eigenvectors
- 'orthonormalized' - orthonormalized
- 'plain' - raw eigenvectors
+ Examples
+ --------
+
+ >>> import numpy as np
+ >>> from metric_learn import LFDA
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> lfda = LFDA(k=2, dim=2)
+ >>> lfda.fit(X, Y)
+
+ References
+ ------------------
+ .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher
+ Discriminant Analysis `_
+ Masashi Sugiyama.
+
+ .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering
+ `_ Yuan Tang.
+ '''
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- '''
+ def __init__(self, n_components=None, num_dims='deprecated',
+ k=None, embedding_type='weighted', preprocessor=None):
if embedding_type not in ('weighted', 'orthonormalized', 'plain'):
raise ValueError('Invalid embedding_type: %r' % embedding_type)
self.n_components = n_components
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 20eeea3b..600d55c0 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -1,16 +1,7 @@
-r"""
-Large Margin Nearest Neighbor Metric learning(LMNN)
-
-LMNN learns a Mahalanobis distance metric in the kNN classification
-setting. The learned metric attempts to keep close k-nearest neighbors
-from the same class, while keeping examples from different classes
-separated by a large margin. This algorithm makes no assumptions about
-the distribution of the data.
-
-Read more in the :ref:`User Guide `.
-
"""
-#TODO: periodic recalculation of impostors, PCA initialization
+Large Margin Nearest Neighbor Metric learning (LMNN)
+"""
+# TODO: periodic recalculation of impostors, PCA initialization
from __future__ import print_function, absolute_import
import numpy as np
@@ -26,81 +17,142 @@
class LMNN(MahalanobisMixin, TransformerMixin):
+ """Large Margin Nearest Neighbor (LMNN)
+
+ LMNN learns a Mahalanobis distance metric in the kNN classification
+ setting. The learned metric attempts to keep close k-nearest neighbors
+ from the same class, while keeping examples from different classes
+ separated by a large margin. This algorithm makes no assumptions about
+ the distribution of the data.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ init : None, string or numpy array, optional (default=None)
+ Initialization of the linear transformation. Possible options are
+ 'auto', 'pca', 'identity', 'random', and a numpy array of shape
+ (n_features_a, n_features_b). If None, will be set automatically to
+ 'auto' (this option is to raise a warning if 'init' is not set,
+ and stays to its default value None, in v0.5.0).
+
+ 'auto'
+ Depending on ``n_components``, the most reasonable initialization
+ will be chosen. If ``n_components <= n_classes`` we use 'lda', as
+ it uses labels information. If not, but
+ ``n_components < min(n_features, n_samples)``, we use 'pca', as
+ it projects data in meaningful directions (those of higher
+ variance). Otherwise, we just use 'identity'.
+
+ 'pca'
+ ``n_components`` principal components of the inputs passed
+ to :meth:`fit` will be used to initialize the transformation.
+ (See `sklearn.decomposition.PCA`)
+
+ 'lda'
+ ``min(n_components, n_classes)`` most discriminative
+ components of the inputs passed to :meth:`fit` will be used to
+ initialize the transformation. (If ``n_components > n_classes``,
+ the rest of the components will be zero.) (See
+ `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+ 'identity'
+ If ``n_components`` is strictly smaller than the
+ dimensionality of the inputs passed to :meth:`fit`, the identity
+ matrix will be truncated to the first ``n_components`` rows.
+
+ 'random'
+ The initial transformation will be a random array of shape
+ `(n_components, n_features)`. Each value is sampled from the
+ standard normal distribution.
+
+ numpy array
+ n_features_b must match the dimensionality of the inputs passed to
+ :meth:`fit` and n_features_a must be less than or equal to that.
+ If ``n_components`` is not None, n_features_a must match it.
+
+ k : int, optional
+ Number of neighbors to consider, not including self-edges.
+
+ min_iter : int, optional (default=50)
+ Minimum number of iterations of the optimization procedure.
+
+ max_iter : int, optional (default=1000)
+ Maximum number of iterations of the optimization procedure.
+
+ learn_rate : float, optional (default=1e-7)
+ Learning rate of the optimization procedure
+
+ tol : float, optional (default=0.001)
+ Tolerance of the optimization procedure. If the objective value varies
+ less than `tol`, we consider the algorithm has converged and stop it.
+
+ verbose : bool, optional (default=False)
+ Whether to print the progress of the optimization procedure.
+
+ regularization: float, optional
+ Weighting of pull and push terms, with 0.5 meaning equal weight.
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
+
+ num_dims : Not used
+
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
+
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to initialize the random
+ transformation. If ``init='pca'``, ``random_state`` is passed as an
+ argument to PCA when initializing the transformation.
+
+ Attributes
+ ----------
+ n_iter_ : `int`
+ The number of iterations the solver has run.
+
+ transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
+ The learned linear transformation ``L``.
+
+ Examples
+ --------
+
+ >>> import numpy as np
+ >>> from metric_learn import LMNN
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> lmnn = LMNN(k=5, learn_rate=1e-6)
+ >>> lmnn.fit(X, Y, verbose=False)
+
+ Notes
+ -----
+
+ If a recent version of the Shogun Python modular (``modshogun``) library
+ is available, the LMNN implementation will use the fast C++ version from
+ there. Otherwise, the included pure-Python version will be used.
+ The two implementations differ slightly, and the C++ version is more
+ complete.
+
+ References
+ ----------
+ .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor
+ Classification `_
+ Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul
+ """
+
def __init__(self, init=None, k=3, min_iter=50, max_iter=1000,
learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
use_pca=True, verbose=False, preprocessor=None,
n_components=None, num_dims='deprecated', random_state=None):
- """Initialize the LMNN object.
-
- Parameters
- ----------
- init : None, string or numpy array, optional (default=None)
- Initialization of the linear transformation. Possible options are
- 'auto', 'pca', 'identity', 'random', and a numpy array of shape
- (n_features_a, n_features_b). If None, will be set automatically to
- 'auto' (this option is to raise a warning if 'init' is not set,
- and stays to its default value None, in v0.5.0).
-
- 'auto'
- Depending on ``n_components``, the most reasonable initialization
- will be chosen. If ``n_components <= n_classes`` we use 'lda', as
- it uses labels information. If not, but
- ``n_components < min(n_features, n_samples)``, we use 'pca', as
- it projects data in meaningful directions (those of higher
- variance). Otherwise, we just use 'identity'.
-
- 'pca'
- ``n_components`` principal components of the inputs passed
- to :meth:`fit` will be used to initialize the transformation.
- (See `sklearn.decomposition.PCA`)
-
- 'lda'
- ``min(n_components, n_classes)`` most discriminative
- components of the inputs passed to :meth:`fit` will be used to
- initialize the transformation. (If ``n_components > n_classes``,
- the rest of the components will be zero.) (See
- `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
-
- 'identity'
- If ``n_components`` is strictly smaller than the
- dimensionality of the inputs passed to :meth:`fit`, the identity
- matrix will be truncated to the first ``n_components`` rows.
-
- 'random'
- The initial transformation will be a random array of shape
- `(n_components, n_features)`. Each value is sampled from the
- standard normal distribution.
-
- numpy array
- n_features_b must match the dimensionality of the inputs passed to
- :meth:`fit` and n_features_a must be less than or equal to that.
- If ``n_components`` is not None, n_features_a must match it.
-
- k : int, optional
- Number of neighbors to consider, not including self-edges.
-
- regularization: float, optional
- Weighting of pull and push terms, with 0.5 meaning equal weight.
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
-
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
-
- num_dims : Not used
-
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
-
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to initialize the random
- transformation. If ``init='pca'``, ``random_state`` is passed as an
- argument to PCA when initializing the transformation.
- """
self.init = init
self.k = k
self.min_iter = min_iter
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index f59392c1..e3b0d323 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -1,17 +1,5 @@
-r"""
-Metric Learning from Relative Comparisons by Minimizing Squared Residual(LSML)
-
-`LSML` proposes a simple, yet effective, algorithm that minimizes a convex
-objective function corresponding to the sum of squared residuals of
-constraints. This algorithm uses the constraints in the form of the
-relative distance comparisons, such method is especially useful where
-pairwise constraints are not natural to obtain, thus pairwise constraints
-based algorithms become infeasible to be deployed. Furthermore, its sparsity
-extension leads to more stable estimation when the dimension is high and
-only a small amount of constraints is given.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML)
"""
from __future__ import print_function, absolute_import, division
@@ -33,46 +21,6 @@ class _BaseLSML(MahalanobisMixin):
def __init__(self, tol=1e-3, max_iter=1000, prior=None,
verbose=False, preprocessor=None, random_state=None):
- """Initialize LSML.
-
- Parameters
- ----------
- prior : None, string or numpy array, optional (default=None)
- Prior to set for the metric. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). For LSML, the prior should be strictly
- positive definite (PD). If `None`, will be set
- automatically to 'identity' (this is to raise a warning if
- `prior` is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The initial Mahalanobis matrix will be a random positive definite
- (PD) matrix of shape `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
-
- tol : float, optional
- max_iter : int, optional
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to set the random
- prior.
- """
self.prior = prior
self.tol = tol
self.max_iter = max_iter
@@ -178,6 +126,55 @@ def _gradient(self, metric, vab, vcd, prior_inv):
class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
"""Least Squared-residual Metric Learning (LSML)
+ `LSML` proposes a simple, yet effective, algorithm that minimizes a convex
+ objective function corresponding to the sum of squared residuals of
+ constraints. This algorithm uses the constraints in the form of the
+ relative distance comparisons, such method is especially useful where
+ pairwise constraints are not natural to obtain, thus pairwise constraints
+ based algorithms become infeasible to be deployed. Furthermore, its sparsity
+ extension leads to more stable estimation when the dimension is high and
+ only a small amount of constraints is given.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ prior : None, string or numpy array, optional (default=None)
+ Prior to set for the metric. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). For LSML, the prior should be strictly
+ positive definite (PD). If `None`, will be set
+ automatically to 'identity' (this is to raise a warning if
+ `prior` is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The initial Mahalanobis matrix will be a random positive definite
+ (PD) matrix of shape `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+
+ tol : float, optional
+ max_iter : int, optional
+ verbose : bool, optional
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to set the random
+ prior.
+
Attributes
----------
n_iter_ : `int`
@@ -186,6 +183,31 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
+
+ Examples
+ --------
+ >>> from metric_learn import LSML_Supervised
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> lsml = LSML_Supervised(num_constraints=200)
+ >>> lsml.fit(X, Y)
+
+ References
+ ----------
+ .. [1] Liu et al. `Metric Learning from Relative Comparisons by Minimizing
+ Squared Residual
+ `_. ICDM 2012.
+
+ .. [2] Adapted from https://gist.github.com/kcarnold/5439917
+
+ See Also
+ --------
+ metric_learn.LSML : The original weakly-supervised algorithm
+
+ :ref:`supervised_version` : The section of the project documentation
+ that describes the supervised version of weakly supervised estimators.
"""
def fit(self, quadruplets, weights=None):
@@ -193,7 +215,7 @@ def fit(self, quadruplets, weights=None):
Parameters
----------
- quadruplets : array-like, shape=(n_constraints, 4, n_features) or
+ quadruplets : array-like, shape=(n_constraints, 4, n_features) or \
(n_constraints, 4)
3D array-like of quadruplets of points or 2D array of quadruplets of
indicators. In order to supervise the algorithm in the right way, we
@@ -214,6 +236,58 @@ def fit(self, quadruplets, weights=None):
class LSML_Supervised(_BaseLSML, TransformerMixin):
"""Supervised version of Least Squared-residual Metric Learning (LSML)
+ `LSML_Supervised` creates quadruplets from labeled samples by taking two
+ samples from the same class, and two samples from different classes.
+ This way it builds quadruplets where the two first points must be more
+ similar than the two last points.
+
+ Parameters
+ ----------
+ tol : float, optional (default=1e-3)
+ Tolerance for the convergence procedure.
+ max_iter : int, optional (default=1000)
+ Number of maximum iterations of the convergence procedure.
+ prior : None, string or numpy array, optional (default=None)
+ Prior to set for the metric. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). For LSML, the prior should be strictly
+ positive definite (PD). If `None`, will be set
+ automatically to 'identity' (this is to raise a warning if
+ `prior` is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The initial Mahalanobis matrix will be a random positive definite
+ (PD) matrix of shape `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+ num_labeled : Not used
+ .. deprecated:: 0.5.0
+ `num_labeled` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0.
+ num_constraints: int, optional
+ number of constraints to generate
+ weights : (m,) array of floats, optional
+ scale factor for each constraint
+ verbose : bool, optional
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to set the random
+ prior.
+
Attributes
----------
n_iter_ : `int`
@@ -227,58 +301,6 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
def __init__(self, tol=1e-3, max_iter=1000, prior=None,
num_labeled='deprecated', num_constraints=None, weights=None,
verbose=False, preprocessor=None, random_state=None):
- """Initialize the supervised version of `LSML`.
-
- `LSML_Supervised` creates quadruplets from labeled samples by taking two
- samples from the same class, and two samples from different classes.
- This way it builds quadruplets where the two first points must be more
- similar than the two last points.
-
- Parameters
- ----------
- tol : float, optional
- max_iter : int, optional
- prior : None, string or numpy array, optional (default=None)
- Prior to set for the metric. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). For LSML, the prior should be strictly
- positive definite (PD). If `None`, will be set
- automatically to 'identity' (this is to raise a warning if
- `prior` is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The initial Mahalanobis matrix will be a random positive definite
- (PD) matrix of shape `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
- num_labeled : Not used
- .. deprecated:: 0.5.0
- `num_labeled` was deprecated in version 0.5.0 and will
- be removed in 0.6.0.
- num_constraints: int, optional
- number of constraints to generate
- weights : (m,) array of floats, optional
- scale factor for each constraint
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to set the random
- prior.
- """
_BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
verbose=verbose, preprocessor=preprocessor,
random_state=random_state)
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
index c625b67c..ea8748be 100644
--- a/metric_learn/mlkr.py
+++ b/metric_learn/mlkr.py
@@ -1,13 +1,5 @@
-r"""
-Metric Learning for Kernel Regression(MLKR)
-
-MLKR is an algorithm for supervised metric learning, which learns a
-distance function by directly minimizing the leave-one-out regression error.
-This algorithm can also be viewed as a supervised variation of PCA and can be
-used for dimensionality reduction and high dimensional data visualization.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Metric Learning for Kernel Regression (MLKR)
"""
from __future__ import division, print_function
import time
@@ -31,6 +23,81 @@
class MLKR(MahalanobisMixin, TransformerMixin):
"""Metric Learning for Kernel Regression (MLKR)
+ MLKR is an algorithm for supervised metric learning, which learns a
+ distance function by directly minimizing the leave-one-out regression error.
+ This algorithm can also be viewed as a supervised variation of PCA and can be
+ used for dimensionality reduction and high dimensional data visualization.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
+
+ num_dims : Not used
+
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
+
+ init : None, string or numpy array, optional (default=None)
+ Initialization of the linear transformation. Possible options are
+ 'auto', 'pca', 'identity', 'random', and a numpy array of shape
+ (n_features_a, n_features_b). If None, will be set automatically to
+ 'auto' (this option is to raise a warning if 'init' is not set,
+ and stays to its default value None, in v0.5.0).
+
+ 'auto'
+ Depending on ``n_components``, the most reasonable initialization
+ will be chosen. If ``n_components < min(n_features, n_samples)``,
+ we use 'pca', as it projects data in meaningful directions (those
+ of higher variance). Otherwise, we just use 'identity'.
+
+ 'pca'
+ ``n_components`` principal components of the inputs passed
+ to :meth:`fit` will be used to initialize the transformation.
+ (See `sklearn.decomposition.PCA`)
+
+ 'identity'
+ If ``n_components`` is strictly smaller than the
+ dimensionality of the inputs passed to :meth:`fit`, the identity
+ matrix will be truncated to the first ``n_components`` rows.
+
+ 'random'
+ The initial transformation will be a random array of shape
+ `(n_components, n_features)`. Each value is sampled from the
+ standard normal distribution.
+
+ numpy array
+ n_features_b must match the dimensionality of the inputs passed to
+ :meth:`fit` and n_features_a must be less than or equal to that.
+ If ``n_components`` is not None, n_features_a must match it.
+
+ A0: Not used.
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'init' instead.
+
+ tol: float, optional (default=None)
+ Convergence tolerance for the optimization.
+
+ max_iter: int, optional
+ Cap on number of conjugate gradient iterations.
+
+ verbose : bool, optional (default=False)
+ Whether to print progress messages or not.
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to initialize the random
+ transformation. If ``init='pca'``, ``random_state`` is passed as an
+ argument to PCA when initializing the transformation.
+
Attributes
----------
n_iter_ : `int`
@@ -38,82 +105,28 @@ class MLKR(MahalanobisMixin, TransformerMixin):
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
+
+ Examples
+ --------
+
+ >>> from metric_learn import MLKR
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> mlkr = MLKR()
+ >>> mlkr.fit(X, Y)
+
+ References
+ ----------
+ .. [1] `Information-theoretic Metric Learning
+ `_ Jason V. Davis, et al.
"""
def __init__(self, n_components=None, num_dims='deprecated', init=None,
A0='deprecated', tol=None, max_iter=1000, verbose=False,
preprocessor=None, random_state=None):
- """
- Initialize MLKR.
-
- Parameters
- ----------
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
-
- num_dims : Not used
-
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
-
- init : None, string or numpy array, optional (default=None)
- Initialization of the linear transformation. Possible options are
- 'auto', 'pca', 'identity', 'random', and a numpy array of shape
- (n_features_a, n_features_b). If None, will be set automatically to
- 'auto' (this option is to raise a warning if 'init' is not set,
- and stays to its default value None, in v0.5.0).
-
- 'auto'
- Depending on ``n_components``, the most reasonable initialization
- will be chosen. If ``n_components < min(n_features, n_samples)``,
- we use 'pca', as it projects data in meaningful directions (those
- of higher variance). Otherwise, we just use 'identity'.
-
- 'pca'
- ``n_components`` principal components of the inputs passed
- to :meth:`fit` will be used to initialize the transformation.
- (See `sklearn.decomposition.PCA`)
-
- 'identity'
- If ``n_components`` is strictly smaller than the
- dimensionality of the inputs passed to :meth:`fit`, the identity
- matrix will be truncated to the first ``n_components`` rows.
-
- 'random'
- The initial transformation will be a random array of shape
- `(n_components, n_features)`. Each value is sampled from the
- standard normal distribution.
-
- numpy array
- n_features_b must match the dimensionality of the inputs passed to
- :meth:`fit` and n_features_a must be less than or equal to that.
- If ``num_dims`` is not None, n_features_a must match it.
-
- A0: Not used.
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'init' instead.
-
- tol: float, optional (default=None)
- Convergence tolerance for the optimization.
-
- max_iter: int, optional
- Cap on number of conjugate gradient iterations.
-
- verbose : bool, optional (default=False)
- Whether to print progress messages or not.
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
-
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to initialize the random
- transformation. If ``init='pca'``, ``random_state`` is passed as an
- argument to PCA when initializing the transformation.
- """
self.n_components = n_components
self.num_dims = num_dims
self.init = init
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index b3e6c203..9f02425c 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -1,21 +1,4 @@
-r"""
-Metric Learning with Application for Clustering with Side Information(MMC)
-
-MMC minimizes the sum of squared distances between similar points, while
-enforcing the sum of distances between dissimilar ones to be greater than one.
-This leads to a convex and, thus, local-minima-free optimization problem that
-can be solved efficiently.
-However, the algorithm involves the computation of eigenvalues, which is the
-main speed-bottleneck. Since it has initially been designed for clustering
-applications, one of the implicit assumptions of MMC is that all classes form
-a compact set, i.e., follow a unimodal distribution, which restricts the
-possible use-cases of this method. However, it is one of the earliest and a
-still often cited technique.
-
-Read more in the :ref:`User Guide `.
-
-"""
-
+"""Mahalanobis Metric for Clustering (MMC)"""
from __future__ import print_function, absolute_import, division
import warnings
import numpy as np
@@ -30,7 +13,6 @@
class _BaseMMC(MahalanobisMixin):
- """Mahalanobis Metric for Clustering (MMC)"""
_tuple_size = 2 # constraints are pairs
@@ -38,61 +20,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
init=None, A0='deprecated', diagonal=False,
diagonal_c=1.0, verbose=False, preprocessor=None,
random_state=None):
- """Initialize MMC.
- Parameters
- ----------
- max_iter : int, optional
- max_proj : int, optional
- convergence_threshold : float, optional
- init : None, string or numpy array, optional (default=None)
- Initialization of the Mahalanobis matrix. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). If None, will be set
- automatically to 'identity' (this is to raise a warning if
- 'init' is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The (pseudo-)inverse of the covariance matrix.
-
- 'random'
- The initial Mahalanobis matrix will be a random SPD matrix of shape
- `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- An SPD matrix of shape (n_features, n_features), that will
- be used as such to initialize the metric.
-
- verbose : bool, optional
- if True, prints information while learning
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be gotten like this: X[indices].
- A0 : Not used.
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'init' instead.
- diagonal : bool, optional
- if True, a diagonal metric will be learned,
- i.e., a simple scaling of dimensions. The initialization will then
- be the diagonal coefficients of the matrix given as 'init'.
- diagonal_c : float, optional
- weight of the dissimilarity constraint for diagonal
- metric learning
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be gotten like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to initialize the random
- transformation.
- """
self.max_iter = max_iter
self.max_proj = max_proj
self.convergence_threshold = convergence_threshold
@@ -403,6 +330,80 @@ def _D_constraint(self, neg_pairs, w):
class MMC(_BaseMMC, _PairsClassifierMixin):
"""Mahalanobis Metric for Clustering (MMC)
+ MMC minimizes the sum of squared distances between similar points, while
+ enforcing the sum of distances between dissimilar ones to be greater than
+ one. This leads to a convex and, thus, local-minima-free optimization
+ problem that can be solved efficiently.
+ However, the algorithm involves the computation of eigenvalues, which is the
+ main speed-bottleneck. Since it has initially been designed for clustering
+ applications, one of the implicit assumptions of MMC is that all classes form
+ a compact set, i.e., follow a unimodal distribution, which restricts the
+ possible use-cases of this method. However, it is one of the earliest and a
+ still often cited technique.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ max_iter : int, optional (default=100)
+ Maximum number of iterations of the convergence procedure.
+
+ max_proj : int, optional (default=10000)
+ Maximum number of projection steps.
+
+ convergence_threshold : float, optional (default=1e-6)
+ Convergence threshold for the convergence procedure.
+
+ init : None, string or numpy array, optional (default=None)
+ Initialization of the Mahalanobis matrix. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). If None, will be set
+ automatically to 'identity' (this is to raise a warning if
+ 'init' is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The (pseudo-)inverse of the covariance matrix.
+
+ 'random'
+ The initial Mahalanobis matrix will be a random SPD matrix of
+ shape
+ `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ An SPD matrix of shape (n_features, n_features), that will
+ be used as such to initialize the metric.
+
+ verbose : bool, optional
+ if True, prints information while learning
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be gotten like this: X[indices].
+ A0 : Not used.
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'init' instead.
+ diagonal : bool, optional
+ if True, a diagonal metric will be learned,
+ i.e., a simple scaling of dimensions. The initialization will then
+ be the diagonal coefficients of the matrix given as 'init'.
+ diagonal_c : float, optional
+ weight of the dissimilarity constraint for diagonal
+ metric learning
+ verbose : bool, optional
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be gotten like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to initialize the random
+ transformation.
+
Attributes
----------
n_iter_ : `int`
@@ -416,6 +417,29 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
If the distance metric between two points is lower than this threshold,
points will be classified as similar, otherwise they will be
classified as dissimilar.
+
+ Examples
+ --------
+ >>> from metric_learn import MMC_Supervised
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> mmc = MMC_Supervised(num_constraints=200)
+ >>> mmc.fit(X, Y)
+
+ References
+ ----------
+ .. [1] `Distance metric learning with application to clustering with
+ side-information `_
+ Xing, Jordan, Russell, Ng.
+
+ See Also
+ --------
+ metric_learn.MMC : The original weakly-supervised algorithm
+ :ref:`supervised_version` : The section of the project documentation
+ that describes the supervised version of weakly supervised estimators.
"""
def fit(self, pairs, y, calibration_params=None):
@@ -426,7 +450,7 @@ def fit(self, pairs, y, calibration_params=None):
Parameters
----------
- pairs : array-like, shape=(n_constraints, 2, n_features) or
+ pairs : array-like, shape=(n_constraints, 2, n_features) or \
(n_constraints, 2)
3D Array of pairs with each row corresponding to two points,
or 2D array of indices of pairs if the metric learner uses a
@@ -453,6 +477,73 @@ def fit(self, pairs, y, calibration_params=None):
class MMC_Supervised(_BaseMMC, TransformerMixin):
"""Supervised version of Mahalanobis Metric for Clustering (MMC)
+ `MMC_Supervised` creates pairs of similar sample by taking same class
+ samples, and pairs of dissimilar samples by taking different class
+ samples. It then passes these pairs to `MMC` for training.
+
+ Parameters
+ ----------
+ max_iter : int, optional
+ max_proj : int, optional
+ convergence_threshold : float, optional
+ num_labeled : Not used
+ .. deprecated:: 0.5.0
+ `num_labeled` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0.
+ num_constraints: int, optional
+ number of constraints to generate
+ init : None, string or numpy array, optional (default=None)
+ Initialization of the Mahalanobis matrix. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). If None, will be set
+ automatically to 'identity' (this is to raise a warning if
+ 'init' is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The (pseudo-)inverse of the covariance matrix.
+
+ 'random'
+ The initial Mahalanobis matrix will be a random SPD matrix of
+ shape `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A numpy array of shape (n_features, n_features), that will
+ be used as such to initialize the metric.
+
+ verbose : bool, optional
+ if True, prints information while learning
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be gotten like this: X[indices].
+ A0 : Not used.
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'init' instead.
+ diagonal : bool, optional
+ if True, a diagonal metric will be learned,
+ i.e., a simple scaling of dimensions
+ diagonal_c : float, optional
+ weight of the dissimilarity constraint for diagonal
+ metric learning
+ verbose : bool, optional
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to initialize the random
+ Mahalanobis matrix.
+
+ `MMC_Supervised` creates pairs of similar sample by taking same class
+ samples, and pairs of dissimilar samples by taking different class
+ samples. It then passes these pairs to `MMC` for training.
+
Attributes
----------
n_iter_ : `int`
@@ -467,71 +558,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
num_labeled='deprecated', num_constraints=None, init=None,
A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False,
preprocessor=None, random_state=None):
- """Initialize the supervised version of `MMC`.
-
- `MMC_Supervised` creates pairs of similar sample by taking same class
- samples, and pairs of dissimilar samples by taking different class
- samples. It then passes these pairs to `MMC` for training.
-
- Parameters
- ----------
- max_iter : int, optional
- max_proj : int, optional
- convergence_threshold : float, optional
- num_labeled : Not used
- .. deprecated:: 0.5.0
- `num_labeled` was deprecated in version 0.5.0 and will
- be removed in 0.6.0.
- num_constraints: int, optional
- number of constraints to generate
- init : None, string or numpy array, optional (default=None)
- Initialization of the Mahalanobis matrix. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). If None, will be set
- automatically to 'identity' (this is to raise a warning if
- 'init' is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The (pseudo-)inverse of the covariance matrix.
-
- 'random'
- The initial Mahalanobis matrix will be a random SPD matrix of
- shape `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A numpy array of shape (n_features, n_features), that will
- be used as such to initialize the metric.
-
- verbose : bool, optional
- if True, prints information while learning
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be gotten like this: X[indices].
- A0 : Not used.
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'init' instead.
- diagonal : bool, optional
- if True, a diagonal metric will be learned,
- i.e., a simple scaling of dimensions
- diagonal_c : float, optional
- weight of the dissimilarity constraint for diagonal
- metric learning
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to initialize the random
- Mahalanobis matrix.
- """
_BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
convergence_threshold=convergence_threshold,
init=init, A0=A0, diagonal=diagonal,
diff --git a/metric_learn/nca.py b/metric_learn/nca.py
index 2b541a64..dcfdac8a 100644
--- a/metric_learn/nca.py
+++ b/metric_learn/nca.py
@@ -1,15 +1,5 @@
-r"""
-Neighborhood Components Analysis(NCA)
-
-NCA is a distance metric learning algorithm which aims to improve the
-accuracy of nearest neighbors classification compared to the standard
-Euclidean distance. The algorithm directly maximizes a stochastic variant
-of the leave-one-out k-nearest neighbors(KNN) score on the training set.
-It can also learn a low-dimensional linear transformation of data that can
-be used for data visualization and fast classification.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Neighborhood Components Analysis (NCA)
"""
from __future__ import absolute_import
@@ -32,6 +22,95 @@
class NCA(MahalanobisMixin, TransformerMixin):
"""Neighborhood Components Analysis (NCA)
+ NCA is a distance metric learning algorithm which aims to improve the
+ accuracy of nearest neighbors classification compared to the standard
+ Euclidean distance. The algorithm directly maximizes a stochastic variant
+ of the leave-one-out k-nearest neighbors(KNN) score on the training set.
+ It can also learn a low-dimensional linear transformation of data that can
+ be used for data visualization and fast classification.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ init : None, string or numpy array, optional (default=None)
+ Initialization of the linear transformation. Possible options are
+ 'auto', 'pca', 'identity', 'random', and a numpy array of shape
+ (n_features_a, n_features_b). If None, will be set automatically to
+ 'auto' (this option is to raise a warning if 'init' is not set,
+ and stays to its default value None, in v0.5.0).
+
+ 'auto'
+ Depending on ``n_components``, the most reasonable initialization
+ will be chosen. If ``n_components <= n_classes`` we use 'lda', as
+ it uses labels information. If not, but
+ ``n_components < min(n_features, n_samples)``, we use 'pca', as
+ it projects data in meaningful directions (those of higher
+ variance). Otherwise, we just use 'identity'.
+
+ 'pca'
+ ``n_components`` principal components of the inputs passed
+ to :meth:`fit` will be used to initialize the transformation.
+ (See `sklearn.decomposition.PCA`)
+
+ 'lda'
+ ``min(n_components, n_classes)`` most discriminative
+ components of the inputs passed to :meth:`fit` will be used to
+ initialize the transformation. (If ``n_components > n_classes``,
+ the rest of the components will be zero.) (See
+ `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
+
+ 'identity'
+ If ``n_components`` is strictly smaller than the
+ dimensionality of the inputs passed to :meth:`fit`, the identity
+ matrix will be truncated to the first ``n_components`` rows.
+
+ 'random'
+ The initial transformation will be a random array of shape
+ `(n_components, n_features)`. Each value is sampled from the
+ standard normal distribution.
+
+ numpy array
+ n_features_b must match the dimensionality of the inputs passed to
+ :meth:`fit` and n_features_a must be less than or equal to that.
+ If ``n_components`` is not None, n_features_a must match it.
+
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
+
+ num_dims : Not used
+
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
+
+ max_iter : int, optional (default=100)
+ Maximum number of iterations done by the optimization algorithm.
+
+ tol : float, optional (default=None)
+ Convergence tolerance for the optimization.
+
+ verbose : bool, optional (default=False)
+ Whether to print progress messages or not.
+
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to initialize the random
+ transformation. If ``init='pca'``, ``random_state`` is passed as an
+ argument to PCA when initializing the transformation.
+
+ Examples
+ --------
+
+ >>> import numpy as np
+ >>> from metric_learn import NCA
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> nca = NCA(max_iter=1000)
+ >>> nca.fit(X, Y)
+
Attributes
----------
n_iter_ : `int`
@@ -39,81 +118,21 @@ class NCA(MahalanobisMixin, TransformerMixin):
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
+
+ References
+ ----------
+ .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. `Neighbourhood
+ Components Analysis
+ `_.
+ Advances in Neural Information Processing Systems. 17, 513-520, 2005.
+
+ .. [2] Wikipedia entry on `Neighborhood Components Analysis
+ `_
"""
def __init__(self, init=None, n_components=None, num_dims='deprecated',
max_iter=100, tol=None, verbose=False, preprocessor=None,
random_state=None):
- """Neighborhood Components Analysis
-
- Parameters
- ----------
- init : None, string or numpy array, optional (default=None)
- Initialization of the linear transformation. Possible options are
- 'auto', 'pca', 'identity', 'random', and a numpy array of shape
- (n_features_a, n_features_b). If None, will be set automatically to
- 'auto' (this option is to raise a warning if 'init' is not set,
- and stays to its default value None, in v0.5.0).
-
- 'auto'
- Depending on ``n_components``, the most reasonable initialization
- will be chosen. If ``n_components <= n_classes`` we use 'lda', as
- it uses labels information. If not, but
- ``n_components < min(n_features, n_samples)``, we use 'pca', as
- it projects data in meaningful directions (those of higher
- variance). Otherwise, we just use 'identity'.
-
- 'pca'
- ``n_components`` principal components of the inputs passed
- to :meth:`fit` will be used to initialize the transformation.
- (See `sklearn.decomposition.PCA`)
-
- 'lda'
- ``min(n_components, n_classes)`` most discriminative
- components of the inputs passed to :meth:`fit` will be used to
- initialize the transformation. (If ``n_components > n_classes``,
- the rest of the components will be zero.) (See
- `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)
-
- 'identity'
- If ``n_components`` is strictly smaller than the
- dimensionality of the inputs passed to :meth:`fit`, the identity
- matrix will be truncated to the first ``n_components`` rows.
-
- 'random'
- The initial transformation will be a random array of shape
- `(n_components, n_features)`. Each value is sampled from the
- standard normal distribution.
-
- numpy array
- n_features_b must match the dimensionality of the inputs passed to
- :meth:`fit` and n_features_a must be less than or equal to that.
- If ``n_components`` is not None, n_features_a must match it.
-
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
-
- num_dims : Not used
-
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
-
- max_iter : int, optional (default=100)
- Maximum number of iterations done by the optimization algorithm.
-
- tol : float, optional (default=None)
- Convergence tolerance for the optimization.
-
- verbose : bool, optional (default=False)
- Whether to print progress messages or not.
-
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to initialize the random
- transformation. If ``init='pca'``, ``random_state`` is passed as an
- argument to PCA when initializing the transformation.
- """
self.n_components = n_components
self.init = init
self.num_dims = num_dims
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 1dbffdd6..503e2408 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -1,14 +1,5 @@
-r"""
-Relative Components Analysis(RCA)
-
-RCA learns a full rank Mahalanobis distance metric based on a weighted sum of
-in-chunklets covariance matrices. It applies a global linear transformation to
-assign large weights to relevant dimensions and low weights to irrelevant
-dimensions. Those relevant dimensions are estimated using "chunklets", subsets
-of points that are known to belong to the same class.
-
-Read more in the :ref:`User Guide `.
-
+"""
+Relative Components Analysis (RCA)
"""
from __future__ import absolute_import
@@ -42,6 +33,52 @@ def _chunk_mean_centering(data, chunks):
class RCA(MahalanobisMixin, TransformerMixin):
"""Relevant Components Analysis (RCA)
+ RCA learns a full rank Mahalanobis distance metric based on a weighted sum of
+ in-chunklets covariance matrices. It applies a global linear transformation
+ to assign large weights to relevant dimensions and low weights to irrelevant
+ dimensions. Those relevant dimensions are estimated using "chunklets",
+ subsets of points that are known to belong to the same class.
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
+
+ num_dims : Not used
+
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
+
+ pca_comps : Not used
+ .. deprecated:: 0.5.0
+ `pca_comps` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0.
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
+ Examples
+ --------
+ >>> from metric_learn import RCA_Supervised
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2)
+ >>> rca.fit(X, Y)
+
+ References
+ ------------------
+ .. [1] `Adjustment learning and relevant component analysis
+ `_ Noam
+ Shental, et al.
+
+
Attributes
----------
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
@@ -50,28 +87,6 @@ class RCA(MahalanobisMixin, TransformerMixin):
def __init__(self, n_components=None, num_dims='deprecated',
pca_comps='deprecated', preprocessor=None):
- """Initialize the learner.
-
- Parameters
- ----------
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
-
- num_dims : Not used
-
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
-
- pca_comps : Not used
- .. deprecated:: 0.5.0
- `pca_comps` was deprecated in version 0.5.0 and will
- be removed in 0.6.0.
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- """
self.n_components = n_components
self.num_dims = num_dims
self.pca_comps = pca_comps
@@ -153,6 +168,27 @@ def _inv_sqrtm(x):
class RCA_Supervised(RCA):
"""Supervised version of Relevant Components Analysis (RCA)
+ `RCA_Supervised` creates chunks of similar points by first sampling a
+ class, taking `chunk_size` elements in it, and repeating the process
+ `num_chunks` times.
+
+ Parameters
+ ----------
+ n_components : int or None, optional (default=None)
+ Dimensionality of reduced space (if None, defaults to dimension of X).
+
+ num_dims : Not used
+
+ .. deprecated:: 0.5.0
+ `num_dims` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use `n_components` instead.
+
+ num_chunks: int, optional
+ chunk_size: int, optional
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+
Attributes
----------
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
@@ -162,29 +198,6 @@ class RCA_Supervised(RCA):
def __init__(self, num_dims='deprecated', n_components=None,
pca_comps='deprecated', num_chunks=100, chunk_size=2,
preprocessor=None):
- """Initialize the supervised version of `RCA`.
-
- `RCA_Supervised` creates chunks of similar points by first sampling a
- class, taking `chunk_size` elements in it, and repeating the process
- `num_chunks` times.
-
- Parameters
- ----------
- n_components : int or None, optional (default=None)
- Dimensionality of reduced space (if None, defaults to dimension of X).
-
- num_dims : Not used
-
- .. deprecated:: 0.5.0
- `num_dims` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use `n_components` instead.
-
- num_chunks: int, optional
- chunk_size: int, optional
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- """
RCA.__init__(self, num_dims=num_dims, n_components=n_components,
pca_comps=pca_comps, preprocessor=preprocessor)
self.num_chunks = num_chunks
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index c5e63fa8..70e65c86 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -1,15 +1,5 @@
-r"""
-Sparse High-Dimensional Metric Learning(SDML)
-
-SDML is an efficient sparse metric learning in high-dimensional space via
-double regularization: an L1-penalization on the off-diagonal elements of the
-Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence between
-:math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either :math:`\mathbf{I}`
-or :math:`\mathbf{\Omega}^{-1}`, where :math:`\mathbf{\Omega}` is the
-covariance matrix).
-
-Read more in the :ref:`User Guide `.
-
+"""
+Sparse High-Dimensional Metric Learning (SDML)
"""
from __future__ import absolute_import
@@ -38,55 +28,6 @@ class _BaseSDML(MahalanobisMixin):
def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
use_cov='deprecated', verbose=False, preprocessor=None,
random_state=None):
- """
- Parameters
- ----------
- balance_param : float, optional
- trade off between sparsity and M0 prior
-
- sparsity_param : float, optional
- trade off between optimizer and sparseness (see graph_lasso)
-
- prior : None, string or numpy array, optional (default=None)
- Prior to set for the metric. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). For SDML, the prior should be strictly
- positive definite (PD). If `None`, will be set
- automatically to 'identity' (this is to raise a warning if
- `prior` is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The prior will be a random positive definite (PD) matrix of shape
- `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
-
- use_cov : Not used.
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'prior' instead.
-
- verbose : bool, optional
- if True, prints information while learning
-
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be gotten like this: X[indices].
-
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``prior='random'``, ``random_state`` is used to set the prior.
- """
self.balance_param = balance_param
self.sparsity_param = sparsity_param
self.prior = prior
@@ -190,6 +131,63 @@ def _fit(self, pairs, y):
class SDML(_BaseSDML, _PairsClassifierMixin):
"""Sparse Distance Metric Learning (SDML)
+ SDML is an efficient sparse metric learning in high-dimensional space via
+ double regularization: an L1-penalization on the off-diagonal elements of the
+ Mahalanobis matrix :math:`\mathbf{M}`, and a log-determinant divergence
+ between :math:`\mathbf{M}` and :math:`\mathbf{M_0}` (set as either
+ :math:`\mathbf{I}` or :math:`\mathbf{\Omega}^{-1}`, where
+ :math:`\mathbf{\Omega}` is the covariance matrix).
+
+ Read more in the :ref:`User Guide `.
+
+ Parameters
+ ----------
+ balance_param : float, optional
+ trade off between sparsity and M0 prior
+
+ sparsity_param : float, optional
+ trade off between optimizer and sparseness (see graph_lasso)
+
+ prior : None, string or numpy array, optional (default=None)
+ Prior to set for the metric. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). For SDML, the prior should be strictly
+ positive definite (PD). If `None`, will be set
+ automatically to 'identity' (this is to raise a warning if
+ `prior` is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The prior will be a random positive definite (PD) matrix of shape
+ `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+
+ use_cov : Not used.
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'prior' instead.
+
+ verbose : bool, optional (default=False)
+ if True, prints information while learning
+
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be gotten like this: X[indices].
+
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``prior='random'``, ``random_state`` is used to set the prior.
+
Attributes
----------
transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
@@ -200,6 +198,27 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
If the distance metric between two points is lower than this threshold,
points will be classified as similar, otherwise they will be
classified as dissimilar.
+
+ Examples
+ --------
+ >>> from metric_learn import SDML_Supervised
+ >>> from sklearn.datasets import load_iris
+ >>> iris_data = load_iris()
+ >>> X = iris_data['data']
+ >>> Y = iris_data['target']
+ >>> sdml = SDML_Supervised(num_constraints=200)
+ >>> sdml.fit(X, Y)
+
+ References
+ ----------
+
+ .. [1] Qi et al.
+ An efficient sparse metric learning in high-dimensional space via
+ L1-penalized log-determinant regularization. ICML 2009.
+ http://lms.comp.nus.edu.sg/sites/default/files/publication\
+-attachments/icml09-guojun.pdf
+
+ .. [2] Adapted from https://gist.github.com/kcarnold/5439945
"""
def fit(self, pairs, y, calibration_params=None):
@@ -210,7 +229,7 @@ def fit(self, pairs, y, calibration_params=None):
Parameters
----------
- pairs : array-like, shape=(n_constraints, 2, n_features) or
+ pairs : array-like, shape=(n_constraints, 2, n_features) or \
(n_constraints, 2)
3D Array of pairs with each row corresponding to two points,
or 2D array of indices of pairs if the metric learner uses a
@@ -238,74 +257,78 @@ def fit(self, pairs, y, calibration_params=None):
class SDML_Supervised(_BaseSDML, TransformerMixin):
"""Supervised version of Sparse Distance Metric Learning (SDML)
+ `SDML_Supervised` creates pairs of similar sample by taking same class
+ samples, and pairs of dissimilar samples by taking different class
+ samples. It then passes these pairs to `SDML` for training.
+
+ Parameters
+ ----------
+ balance_param : float, optional (default=0.5)
+ trade off between sparsity and M0 prior
+ sparsity_param : float, optional (default=0.01)
+ trade off between optimizer and sparseness (see graph_lasso)
+ prior : None, string or numpy array, optional (default=None)
+ Prior to set for the metric. Possible options are
+ 'identity', 'covariance', 'random', and a numpy array of
+ shape (n_features, n_features). For SDML, the prior should be strictly
+ positive definite (PD). If `None`, will be set
+ automatically to 'identity' (this is to raise a warning if
+ `prior` is not set, and stays to its default value (None), in v0.5.0).
+
+ 'identity'
+ An identity matrix of shape (n_features, n_features).
+
+ 'covariance'
+ The inverse covariance matrix.
+
+ 'random'
+ The prior will be a random SPD matrix of shape
+ `(n_features, n_features)`, generated using
+ `sklearn.datasets.make_spd_matrix`.
+
+ numpy array
+ A positive definite (PD) matrix of shape
+ (n_features, n_features), that will be used as such to set the
+ prior.
+
+ use_cov : Not used.
+ .. deprecated:: 0.5.0
+ `A0` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0. Use 'prior' instead.
+
+ num_labeled : Not used
+ .. deprecated:: 0.5.0
+ `num_labeled` was deprecated in version 0.5.0 and will
+ be removed in 0.6.0.
+ num_constraints : int, optional (default=None)
+ number of constraints to generate
+ verbose : bool, optional (default=False)
+ if True, prints information while learning
+ preprocessor : array-like, shape=(n_samples, n_features) or callable
+ The preprocessor to call to get tuples from indices. If array-like,
+ tuples will be formed like this: X[indices].
+ random_state : int or numpy.RandomState or None, optional (default=None)
+ A pseudo random number generator object or a seed for it if int. If
+ ``init='random'``, ``random_state`` is used to set the random
+ prior.
+
Attributes
----------
transformer_ : `numpy.ndarray`, shape=(n_features, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
+
+ See Also
+ --------
+ metric_learn.SDML : The original weakly-supervised algorithm
+ :ref:`supervised_version` : The section of the project documentation
+ that describes the supervised version of weakly supervised estimators.
"""
def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
use_cov='deprecated', num_labeled='deprecated',
num_constraints=None, verbose=False, preprocessor=None,
random_state=None):
- """Initialize the supervised version of `SDML`.
-
- `SDML_Supervised` creates pairs of similar sample by taking same class
- samples, and pairs of dissimilar samples by taking different class
- samples. It then passes these pairs to `SDML` for training.
-
- Parameters
- ----------
- balance_param : float, optional
- trade off between sparsity and M0 prior
- sparsity_param : float, optional
- trade off between optimizer and sparseness (see graph_lasso)
- prior : None, string or numpy array, optional (default=None)
- Prior to set for the metric. Possible options are
- 'identity', 'covariance', 'random', and a numpy array of
- shape (n_features, n_features). For SDML, the prior should be strictly
- positive definite (PD). If `None`, will be set
- automatically to 'identity' (this is to raise a warning if
- `prior` is not set, and stays to its default value (None), in v0.5.0).
-
- 'identity'
- An identity matrix of shape (n_features, n_features).
-
- 'covariance'
- The inverse covariance matrix.
-
- 'random'
- The prior will be a random SPD matrix of shape
- `(n_features, n_features)`, generated using
- `sklearn.datasets.make_spd_matrix`.
-
- numpy array
- A positive definite (PD) matrix of shape
- (n_features, n_features), that will be used as such to set the
- prior.
-
- use_cov : Not used.
- .. deprecated:: 0.5.0
- `A0` was deprecated in version 0.5.0 and will
- be removed in 0.6.0. Use 'prior' instead.
-
- num_labeled : Not used
- .. deprecated:: 0.5.0
- `num_labeled` was deprecated in version 0.5.0 and will
- be removed in 0.6.0.
- num_constraints : int, optional
- number of constraints to generate
- verbose : bool, optional
- if True, prints information while learning
- preprocessor : array-like, shape=(n_samples, n_features) or callable
- The preprocessor to call to get tuples from indices. If array-like,
- tuples will be formed like this: X[indices].
- random_state : int or numpy.RandomState or None, optional (default=None)
- A pseudo random number generator object or a seed for it if int. If
- ``init='random'``, ``random_state`` is used to set the random
- prior.
- """
_BaseSDML.__init__(self, balance_param=balance_param,
sparsity_param=sparsity_param, prior=prior,
use_cov=use_cov, verbose=verbose,