Skip to content

Rename variables, proposed by issue #257 #324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions bench/benchmarks/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

CLASSES = {
'Covariance': metric_learn.Covariance(),
'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200),
'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200),
'LFDA': metric_learn.LFDA(k=2, dim=2),
'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False),
'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200),
'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False),
'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200),
'MLKR': metric_learn.MLKR(),
'NCA': metric_learn.NCA(max_iter=700, n_components=2),
'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30,
'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30,
chunk_size=2),
'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500)
'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500)
}


Expand Down
8 changes: 4 additions & 4 deletions doc/supervised.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes,
X = iris_data['data']
Y = iris_data['target']

lmnn = LMNN(k=5, learn_rate=1e-6)
lmnn = LMNN(n_neighbors=5, learn_rate=1e-6)
lmnn.fit(X, Y, verbose=False)

.. topic:: References:
Expand Down Expand Up @@ -393,8 +393,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function
(of label +1), this method will look at all the samples from the same label and
sample randomly a pair among them. To sample negative pairs (of label -1), this
method will look at all the samples from a different class and sample randomly
a pair among them. The method will try to build `num_constraints` positive
pairs and `num_constraints` negative pairs, but sometimes it cannot find enough
a pair among them. The method will try to build `n_constraints` positive
pairs and `n_constraints` negative pairs, but sometimes it cannot find enough
of one of those, so forcing `same_length=True` will return both times the
minimum of the two lenghts.

Expand All @@ -416,5 +416,5 @@ last points should be less similar than the two first points).
X = iris_data['data']
Y = iris_data['target']

mmc = MMC_Supervised(num_constraints=200)
mmc = MMC_Supervised(n_constraints=200)
mmc.fit(X, Y)
4 changes: 2 additions & 2 deletions doc/weakly_supervised.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ are respected.
>>> from metric_learn import MMC
>>> mmc = MMC(random_state=42)
>>> mmc.fit(tuples, y)
MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
MMC(A0='deprecated', tol=0.001, diagonal=False,
diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000,
preprocessor=None, random_state=42, verbose=False)

Expand Down Expand Up @@ -250,7 +250,7 @@ tuples).
>>> y_pairs = np.array([1, -1])
>>> mmc = MMC(random_state=42)
>>> mmc.fit(pairs, y_pairs)
MMC(convergence_threshold=0.001, diagonal=False,
MMC(tol=0.001, diagonal=False,
diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None,
random_state=42, verbose=False)

Expand Down
4 changes: 2 additions & 2 deletions examples/plot_metric_learning_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
#

# setting up LMNN
lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)
lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6)

# fit the data!
lmnn.fit(X, y)
Expand Down Expand Up @@ -310,7 +310,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
# - See more in the documentation of the class :py:class:`RCA
# <metric_learn.RCA>`

rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)
rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2)
X_rca = rca.fit_transform(X, y)

plot_tsne(X_rca, y)
Expand Down
6 changes: 3 additions & 3 deletions examples/plot_sandwich.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def sandwich_demo():

mls = [
LMNN(),
ITML_Supervised(num_constraints=200),
SDML_Supervised(num_constraints=200, balance_param=0.001),
LSML_Supervised(num_constraints=200),
ITML_Supervised(n_constraints=200),
SDML_Supervised(n_constraints=200, balance_param=0.001),
LSML_Supervised(n_constraints=200),
]

for ax_num, ml in enumerate(mls, start=3):
Expand Down
60 changes: 40 additions & 20 deletions metric_learn/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sklearn.utils import check_random_state
from sklearn.neighbors import NearestNeighbors


__all__ = ['Constraints']


Expand All @@ -31,21 +32,21 @@ def __init__(self, partial_labels):
partial_labels = np.asanyarray(partial_labels, dtype=int)
self.partial_labels = partial_labels

def positive_negative_pairs(self, num_constraints, same_length=False,
random_state=None):
def positive_negative_pairs(self, n_constraints, same_length=False,
random_state=None, num_constraints='deprecated'):
"""
Generates positive pairs and negative pairs from labeled data.

Positive pairs are formed by randomly drawing ``num_constraints`` pairs of
Positive pairs are formed by randomly drawing ``n_constraints`` pairs of
points with the same label. Negative pairs are formed by randomly drawing
``num_constraints`` pairs of points with different label.
``n_constraints`` pairs of points with different label.

In the case where it is not possible to generate enough positive or
negative pairs, a smaller number of pairs will be returned with a warning.

Parameters
----------
num_constraints : int
n_constraints : int
Number of positive and negative constraints to generate.

same_length : bool, optional (default=False)
Expand All @@ -55,6 +56,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int.

num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0

Returns
-------
a : array-like, shape=(n_constraints,)
Expand All @@ -69,10 +72,18 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
d : array-like, shape=(n_constraints,)
1D array of indicators for the right elements of negative pairs.
"""
if num_constraints != 'deprecated':
warnings.warn('"num_constraints" parameter has been renamed to'
' "n_constraints". It has been deprecated in'
' version 0.6.3 and will be removed in 0.7.0'
'', FutureWarning)
self.n_constraints = num_constraints
else:
self.n_constraints = n_constraints
random_state = check_random_state(random_state)
a, b = self._pairs(num_constraints, same_label=True,
a, b = self._pairs(n_constraints, same_label=True,
random_state=random_state)
c, d = self._pairs(num_constraints, same_label=False,
c, d = self._pairs(n_constraints, same_label=False,
random_state=random_state)
if same_length and len(a) != len(c):
n = min(len(a), len(c))
Expand Down Expand Up @@ -188,15 +199,15 @@ def generate_knntriplets(self, X, k_genuine, k_impostor):

return triplets

def _pairs(self, num_constraints, same_label=True, max_iter=10,
def _pairs(self, n_constraints, same_label=True, max_iter=10,
random_state=np.random):
known_label_idx, = np.where(self.partial_labels >= 0)
known_labels = self.partial_labels[known_label_idx]
num_labels = len(known_labels)
ab = set()
it = 0
while it < max_iter and len(ab) < num_constraints:
nc = num_constraints - len(ab)
while it < max_iter and len(ab) < n_constraints:
nc = n_constraints - len(ab)
for aidx in random_state.randint(num_labels, size=nc):
if same_label:
mask = known_labels[aidx] == known_labels
Expand All @@ -207,25 +218,26 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10,
if len(b_choices) > 0:
ab.add((aidx, random_state.choice(b_choices)))
it += 1
if len(ab) < num_constraints:
if len(ab) < n_constraints:
warnings.warn("Only generated %d %s constraints (requested %d)" % (
len(ab), 'positive' if same_label else 'negative', num_constraints))
ab = np.array(list(ab)[:num_constraints], dtype=int)
len(ab), 'positive' if same_label else 'negative', n_constraints))
ab = np.array(list(ab)[:n_constraints], dtype=int)
return known_label_idx[ab.T]

def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
def chunks(self, n_chunks=100, chunk_size=2, random_state=None,
num_chunks='deprecated'):
"""
Generates chunks from labeled data.

Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from
Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from
the same class drawn at random. Each point can belong to at most 1 chunk.

In the case where there is not enough points to generate ``num_chunks``
In the case where there is not enough points to generate ``n_chunks``
chunks of size ``chunk_size``, a ValueError will be raised.

Parameters
----------
num_chunks : int, optional (default=100)
n_chunks : int, optional (default=100)
Number of chunks to generate.

chunk_size : int, optional (default=2)
Expand All @@ -234,26 +246,34 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
random_state : int or numpy.RandomState or None, optional (default=None)
A pseudo random number generator object or a seed for it if int.

num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0

Returns
-------
chunks : array-like, shape=(n_samples,)
1D array of chunk indicators, where -1 indicates that the point does not
belong to any chunk.
"""
if num_chunks != 'deprecated':
warnings.warn('"num_chunks" parameter has been renamed to'
' "n_chunks". It has been deprecated in'
' version 0.6.3 and will be removed in 0.7.0'
'', FutureWarning)
n_chunks = num_chunks
random_state = check_random_state(random_state)
chunks = -np.ones_like(self.partial_labels, dtype=int)
uniq, lookup = np.unique(self.partial_labels, return_inverse=True)
unknown_uniq = np.where(uniq < 0)[0]
all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq))
if c not in unknown_uniq]
max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds]))
if max_chunks < num_chunks:
if max_chunks < n_chunks:
raise ValueError(('Not enough possible chunks of %d elements in each'
' class to form expected %d chunks - maximum number'
' of chunks is %d'
) % (chunk_size, num_chunks, max_chunks))
) % (chunk_size, n_chunks, max_chunks))
idx = 0
while idx < num_chunks and all_inds:
while idx < n_chunks and all_inds:
if len(all_inds) == 1:
c = 0
else:
Expand Down
62 changes: 44 additions & 18 deletions metric_learn/itml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,28 @@
from .base_metric import _PairsClassifierMixin, MahalanobisMixin
from .constraints import Constraints, wrap_pairs
from ._util import components_from_metric, _initialize_metric_mahalanobis
import warnings


class _BaseITML(MahalanobisMixin):
"""Information Theoretic Metric Learning (ITML)"""

_tuple_size = 2 # constraints are pairs

def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
def __init__(self, gamma=1., max_iter=1000, tol=1e-3,
prior='identity', verbose=False,
preprocessor=None, random_state=None):
preprocessor=None, random_state=None,
convergence_threshold='deprecated'):
if convergence_threshold != 'deprecated':
warnings.warn('"convergence_threshold" parameter has been '
' renamed to "tol". It has been deprecated in'
' version 0.6.3 and will be removed in 0.7.0'
'', FutureWarning)
tol = convergence_threshold
self.convergence_threshold = 'deprecated' # Avoid errors
self.gamma = gamma
self.max_iter = max_iter
self.convergence_threshold = convergence_threshold
self.tol = tol
self.prior = prior
self.verbose = verbose
self.random_state = random_state
Expand Down Expand Up @@ -86,7 +95,7 @@ def _fit(self, pairs, y, bounds=None):
conv = np.inf
break
conv = np.abs(lambdaold - _lambda).sum() / normsum
if conv < self.convergence_threshold:
if conv < self.tol:
break
lambdaold = _lambda.copy()
if self.verbose:
Expand Down Expand Up @@ -122,7 +131,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
max_iter : int, optional (default=1000)
Maximum number of iteration of the optimization procedure.

convergence_threshold : float, optional (default=1e-3)
tol : float, optional (default=1e-3)
Convergence tolerance.

prior : string or numpy array, optional (default='identity')
Expand Down Expand Up @@ -158,6 +167,8 @@ class ITML(_BaseITML, _PairsClassifierMixin):
A pseudo random number generator object or a seed for it if int. If
``prior='random'``, ``random_state`` is used to set the prior.

convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0

Attributes
----------
bounds_ : `numpy.ndarray`, shape=(2,)
Expand Down Expand Up @@ -260,10 +271,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
max_iter : int, optional (default=1000)
Maximum number of iterations of the optimization procedure.

convergence_threshold : float, optional (default=1e-3)
tol : float, optional (default=1e-3)
Tolerance of the optimization procedure.

num_constraints : int, optional (default=None)
n_constraints : int, optional (default=None)
Number of constraints to generate. If None, default to `20 *
num_classes**2`.

Expand Down Expand Up @@ -302,6 +313,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
case, `random_state` is also used to randomly sample constraints from
labels.

num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0

convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0

Attributes
----------
Expand All @@ -328,7 +342,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
>>> iris_data = load_iris()
>>> X = iris_data['data']
>>> Y = iris_data['target']
>>> itml = ITML_Supervised(num_constraints=200)
>>> itml = ITML_Supervised(n_constraints=200)
>>> itml.fit(X, Y)

See Also
Expand All @@ -338,14 +352,26 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
that describes the supervised version of weakly supervised estimators.
"""

def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3,
num_constraints=None, prior='identity',
verbose=False, preprocessor=None, random_state=None):
def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3,
n_constraints=None, prior='identity',
verbose=False, preprocessor=None, random_state=None,
num_constraints='deprecated',
convergence_threshold='deprecated'):
_BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
convergence_threshold=convergence_threshold,
tol=tol,
prior=prior, verbose=verbose,
preprocessor=preprocessor, random_state=random_state)
self.num_constraints = num_constraints
preprocessor=preprocessor,
random_state=random_state,
convergence_threshold=convergence_threshold)
if num_constraints != 'deprecated':
warnings.warn('"num_constraints" parameter has been renamed to'
' "n_constraints". It has been deprecated in'
' version 0.6.3 and will be removed in 0.7.0'
'', FutureWarning)
n_constraints = num_constraints
self.n_constraints = n_constraints
# Avoid test get_params from failing (all params passed sholud be set)
self.num_constraints = 'deprecated'

def fit(self, X, y, bounds=None):
"""Create constraints from labels and learn the ITML model.
Expand All @@ -369,13 +395,13 @@ def fit(self, X, y, bounds=None):
points in the training data `X`.
"""
X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
num_constraints = self.num_constraints
if num_constraints is None:
n_constraints = self.n_constraints
if n_constraints is None:
num_classes = len(np.unique(y))
num_constraints = 20 * num_classes**2
n_constraints = 20 * num_classes**2

c = Constraints(y)
pos_neg = c.positive_negative_pairs(num_constraints,
pos_neg = c.positive_negative_pairs(n_constraints,
random_state=self.random_state)
pairs, y = wrap_pairs(X, pos_neg)
return _BaseITML._fit(self, pairs, y, bounds=bounds)
Loading