Skip to content

[MRG] Uniformize num_dims and add it for LMNN #193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 7, 2019
Merged
10 changes: 10 additions & 0 deletions metric_learn/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,3 +411,13 @@ def validate_vector(u, dtype=None):
if u.ndim > 1:
raise ValueError("Input vector should be 1-D.")
return u


def _check_n_components(n_features, n_components):
"""Checks that n_components is less than n_features and deal with the None
case"""
if n_components is None:
return n_features
if 0 < n_components <= n_features:
return n_components
raise ValueError('Invalid n_components, must be in [1, %d]' % n_features)
6 changes: 3 additions & 3 deletions metric_learn/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner,

Attributes
----------
transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
"""

Expand Down Expand Up @@ -232,7 +232,7 @@ def transform(self, X):

Returns
-------
X_embedded : `numpy.ndarray`, shape=(n_samples, num_dims)
X_embedded : `numpy.ndarray`, shape=(n_samples, n_components)
The embedded data points.
"""
X_checked = check_input(X, type_of_inputs='classic', estimator=self,
Expand Down Expand Up @@ -288,7 +288,7 @@ def get_mahalanobis_matrix(self):

Returns
-------
M : `numpy.ndarray`, shape=(n_components, n_features)
M : `numpy.ndarray`, shape=(n_features, n_features)
The copy of the learned Mahalanobis matrix.
"""
return self.transformer_.T.dot(self.transformer_)
Expand Down
2 changes: 1 addition & 1 deletion metric_learn/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Covariance(MahalanobisMixin, TransformerMixin):

Attributes
----------
transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
"""
Expand Down
8 changes: 4 additions & 4 deletions metric_learn/itml.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)

Expand Down Expand Up @@ -218,7 +218,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
"""
Expand Down Expand Up @@ -292,11 +292,11 @@ def fit(self, X, y, random_state=np.random, bounds=None):
if self.num_labeled != 'deprecated':
warnings.warn('"num_labeled" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
'removed in 0.6.0', DeprecationWarning)
' removed in 0.6.0', DeprecationWarning)
if self.bounds != 'deprecated':
warnings.warn('"bounds" parameter from initialization is not used.'
' It has been deprecated in version 0.5.0 and will be'
'removed in 0.6.0. Use the "bounds" parameter of this '
' removed in 0.6.0. Use the "bounds" parameter of this '
'fit method instead.', DeprecationWarning)
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
Expand Down
33 changes: 21 additions & 12 deletions metric_learn/lfda.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from six.moves import xrange
from sklearn.metrics import pairwise_distances
from sklearn.base import TransformerMixin

from ._util import _check_n_components
from .base_metric import MahalanobisMixin


Expand All @@ -26,23 +28,29 @@ class LFDA(MahalanobisMixin, TransformerMixin):

Attributes
----------
transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
'''

def __init__(self, num_dims=None, k=None, embedding_type='weighted',
preprocessor=None):
def __init__(self, n_components=None, num_dims='deprecated',
k=None, embedding_type='weighted', preprocessor=None):
'''
Initialize LFDA.

Parameters
----------
num_dims : int, optional
Dimensionality of reduced space (defaults to dimension of X)
n_components : int or None, optional (default=None)
Dimensionality of reduced space (if None, defaults to dimension of X).

num_dims : Not used

.. deprecated:: 0.5.0
`num_dims` was deprecated in version 0.5.0 and will
be removed in 0.6.0. Use `n_components` instead.

k : int, optional
Number of nearest neighbors used in local scaling method.
Defaults to min(7, num_dims - 1).
Defaults to min(7, n_components - 1).

embedding_type : str, optional
Type of metric in the embedding space (default: 'weighted')
Expand All @@ -56,6 +64,7 @@ def __init__(self, num_dims=None, k=None, embedding_type='weighted',
'''
if embedding_type not in ('weighted', 'orthonormalized', 'plain'):
raise ValueError('Invalid embedding_type: %r' % embedding_type)
self.n_components = n_components
self.num_dims = num_dims
self.embedding_type = embedding_type
self.k = k
Expand All @@ -72,17 +81,17 @@ def fit(self, X, y):
y : (n,) array-like
Class labels, one per point of data.
'''
if self.num_dims != 'deprecated':
warnings.warn('"num_dims" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
' removed in 0.6.0. Use "n_components" instead',
DeprecationWarning)
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
unique_classes, y = np.unique(y, return_inverse=True)
n, d = X.shape
num_classes = len(unique_classes)

if self.num_dims is None:
dim = d
else:
if not 0 < self.num_dims <= d:
raise ValueError('Invalid num_dims, must be in [1,%d]' % d)
dim = self.num_dims
dim = _check_n_components(d, self.n_components)

if self.k is None:
k = min(7, d - 1)
Expand Down
28 changes: 24 additions & 4 deletions metric_learn/lmnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@
from six.moves import xrange
from sklearn.metrics import euclidean_distances
from sklearn.base import TransformerMixin

from ._util import _check_n_components
from .base_metric import MahalanobisMixin


# commonality between LMNN implementations
class _base_LMNN(MahalanobisMixin, TransformerMixin):
def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
regularization=0.5, convergence_tol=0.001, use_pca=True,
verbose=False, preprocessor=None):
verbose=False, preprocessor=None, n_components=None,
num_dims='deprecated'):
"""Initialize the LMNN object.

Parameters
Expand All @@ -40,6 +43,15 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
preprocessor : array-like, shape=(n_samples, n_features) or callable
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].

n_components : int or None, optional (default=None)
Dimensionality of reduced space (if None, defaults to dimension of X).

num_dims : Not used

.. deprecated:: 0.5.0
`num_dims` was deprecated in version 0.5.0 and will
be removed in 0.6.0. Use `n_components` instead.
"""
self.k = k
self.min_iter = min_iter
Expand All @@ -49,27 +61,35 @@ def __init__(self, k=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
self.convergence_tol = convergence_tol
self.use_pca = use_pca
self.verbose = verbose
self.n_components = n_components
self.num_dims = num_dims
super(_base_LMNN, self).__init__(preprocessor)


# slower Python version
class python_LMNN(_base_LMNN):

def fit(self, X, y):
if self.num_dims != 'deprecated':
warnings.warn('"num_dims" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
' removed in 0.6.0. Use "n_components" instead',
DeprecationWarning)
k = self.k
reg = self.regularization
learn_rate = self.learn_rate

X, y = self._prepare_inputs(X, y, dtype=float,
ensure_min_samples=2)
num_pts, num_dims = X.shape
num_pts, d = X.shape
output_dim = _check_n_components(d, self.n_components)
unique_labels, label_inds = np.unique(y, return_inverse=True)
if len(label_inds) != num_pts:
raise ValueError('Must have one label per point.')
self.labels_ = np.arange(len(unique_labels))
if self.use_pca:
warnings.warn('use_pca does nothing for the python_LMNN implementation')
self.transformer_ = np.eye(num_dims)
self.transformer_ = np.eye(output_dim, d)
required_k = np.bincount(label_inds).min()
if self.k > required_k:
raise ValueError('not enough class labels for specified k'
Expand Down Expand Up @@ -272,7 +292,7 @@ class LMNN(_base_LMNN):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
"""

Expand Down
6 changes: 3 additions & 3 deletions metric_learn/lsml.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
"""
Expand Down Expand Up @@ -182,7 +182,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
"""
Expand Down Expand Up @@ -241,7 +241,7 @@ def fit(self, X, y, random_state=np.random):
if self.num_labeled != 'deprecated':
warnings.warn('"num_labeled" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
'removed in 0.6.0', DeprecationWarning)
' removed in 0.6.0', DeprecationWarning)
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
if num_constraints is None:
Expand Down
27 changes: 21 additions & 6 deletions metric_learn/mlkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@


from sklearn.metrics import pairwise_distances

from metric_learn._util import _check_n_components
from .base_metric import MahalanobisMixin

EPS = np.finfo(float).eps
Expand All @@ -36,19 +38,25 @@ class MLKR(MahalanobisMixin, TransformerMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The learned linear transformation ``L``.
"""

def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
verbose=False, preprocessor=None):
def __init__(self, n_components=None, num_dims='deprecated', A0=None,
tol=None, max_iter=1000, verbose=False, preprocessor=None):
"""
Initialize MLKR.

Parameters
----------
num_dims : int, optional
Dimensionality of reduced space (defaults to dimension of X)
n_components : int or None, optional (default=None)
Dimensionality of reduced space (if None, defaults to dimension of X).

num_dims : Not used

.. deprecated:: 0.5.0
`num_dims` was deprecated in version 0.5.0 and will
be removed in 0.6.0. Use `n_components` instead.

A0: array-like, optional
Initialization of transformation matrix. Defaults to PCA loadings.
Expand All @@ -66,6 +74,7 @@ def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
The preprocessor to call to get tuples from indices. If array-like,
tuples will be formed like this: X[indices].
"""
self.n_components = n_components
self.num_dims = num_dims
self.A0 = A0
self.tol = tol
Expand All @@ -82,6 +91,11 @@ def fit(self, X, y):
X : (n x d) array of samples
y : (n) data labels
"""
if self.num_dims != 'deprecated':
warnings.warn('"num_dims" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
' removed in 0.6.0. Use "n_components" instead',
DeprecationWarning)
X, y = self._prepare_inputs(X, y, y_numeric=True,
ensure_min_samples=2)
n, d = X.shape
Expand All @@ -90,7 +104,8 @@ def fit(self, X, y):
% (n, y.shape[0]))

A = self.A0
m = self.num_dims
m = _check_n_components(d, self.n_components)
m = self.n_components
if m is None:
m = d
if A is None:
Expand Down
6 changes: 3 additions & 3 deletions metric_learn/mmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)

Expand Down Expand Up @@ -406,7 +406,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
n_iter_ : `int`
The number of iterations the solver has run.

transformer_ : `numpy.ndarray`, shape=(num_dims, n_features)
transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
The linear transformation ``L`` deduced from the learned Mahalanobis
metric (See function `transformer_from_metric`.)
"""
Expand Down Expand Up @@ -469,7 +469,7 @@ def fit(self, X, y, random_state=np.random):
if self.num_labeled != 'deprecated':
warnings.warn('"num_labeled" parameter is not used.'
' It has been deprecated in version 0.5.0 and will be'
'removed in 0.6.0', DeprecationWarning)
' removed in 0.6.0', DeprecationWarning)
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
if num_constraints is None:
Expand Down
Loading