Skip to content

Commit 13f1535

Browse files
authored
Merge pull request #92 from wdevazelhes/new_api_fresh_start
[MRG + 1] Allow already formed tuples as an input.
2 parents c79875c + b4bdec4 commit 13f1535

File tree

8 files changed

+173
-128
lines changed

8 files changed

+173
-128
lines changed

metric_learn/constraints.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,13 @@ def random_subset(all_labels, num_preserved=np.inf, random_state=np.random):
100100
partial_labels = np.array(all_labels, copy=True)
101101
partial_labels[idx] = -1
102102
return Constraints(partial_labels)
103+
104+
def wrap_pairs(X, constraints):
105+
a = np.array(constraints[0])
106+
b = np.array(constraints[1])
107+
c = np.array(constraints[2])
108+
d = np.array(constraints[3])
109+
constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d))))
110+
y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))])
111+
pairs = X[constraints]
112+
return pairs, y

metric_learn/itml.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from sklearn.utils.validation import check_array, check_X_y
2121

2222
from .base_metric import BaseMetricLearner
23-
from .constraints import Constraints
23+
from .constraints import Constraints, wrap_pairs
2424
from ._util import vector_norm
2525

2626

@@ -51,52 +51,63 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
5151
self.A0 = A0
5252
self.verbose = verbose
5353

54-
def _process_inputs(self, X, constraints, bounds):
55-
self.X_ = X = check_array(X)
54+
def _process_pairs(self, pairs, y, bounds):
55+
pairs, y = check_X_y(pairs, y, accept_sparse=False,
56+
ensure_2d=False, allow_nd=True)
57+
5658
# check to make sure that no two constrained vectors are identical
57-
a,b,c,d = constraints
58-
no_ident = vector_norm(X[a] - X[b]) > 1e-9
59-
a, b = a[no_ident], b[no_ident]
60-
no_ident = vector_norm(X[c] - X[d]) > 1e-9
61-
c, d = c[no_ident], d[no_ident]
59+
pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
60+
pos_no_ident = vector_norm(pos_pairs[:, 0, :] - pos_pairs[:, 1, :]) > 1e-9
61+
pos_pairs = pos_pairs[pos_no_ident]
62+
neg_no_ident = vector_norm(neg_pairs[:, 0, :] - neg_pairs[:, 1, :]) > 1e-9
63+
neg_pairs = neg_pairs[neg_no_ident]
6264
# init bounds
6365
if bounds is None:
66+
X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
6467
self.bounds_ = np.percentile(pairwise_distances(X), (5, 95))
6568
else:
6669
assert len(bounds) == 2
6770
self.bounds_ = bounds
6871
self.bounds_[self.bounds_==0] = 1e-9
6972
# init metric
7073
if self.A0 is None:
71-
self.A_ = np.identity(X.shape[1])
74+
self.A_ = np.identity(pairs.shape[2])
7275
else:
7376
self.A_ = check_array(self.A0)
74-
return a,b,c,d
77+
pairs = np.vstack([pos_pairs, neg_pairs])
78+
y = np.hstack([np.ones(len(pos_pairs)), - np.ones(len(neg_pairs))])
79+
return pairs, y
80+
7581

76-
def fit(self, X, constraints, bounds=None):
82+
def fit(self, pairs, y, bounds=None):
7783
"""Learn the ITML model.
7884
7985
Parameters
8086
----------
81-
X : (n x d) data matrix
82-
each row corresponds to a single instance
83-
constraints : 4-tuple of arrays
84-
(a,b,c,d) indices into X, with (a,b) specifying positive and (c,d)
85-
negative pairs
87+
pairs: array-like, shape=(n_constraints, 2, n_features)
88+
Array of pairs. Each row corresponds to two points.
89+
y: array-like, of shape (n_constraints,)
90+
Labels of constraints. Should be -1 for dissimilar pair, 1 for similar.
8691
bounds : list (pos,neg) pairs, optional
8792
bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg
93+
94+
Returns
95+
-------
96+
self : object
97+
Returns the instance.
8898
"""
89-
a,b,c,d = self._process_inputs(X, constraints, bounds)
99+
pairs, y = self._process_pairs(pairs, y, bounds)
90100
gamma = self.gamma
91-
num_pos = len(a)
92-
num_neg = len(c)
101+
pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1]
102+
num_pos = len(pos_pairs)
103+
num_neg = len(neg_pairs)
93104
_lambda = np.zeros(num_pos + num_neg)
94105
lambdaold = np.zeros_like(_lambda)
95106
gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.)
96107
pos_bhat = np.zeros(num_pos) + self.bounds_[0]
97108
neg_bhat = np.zeros(num_neg) + self.bounds_[1]
98-
pos_vv = self.X_[a] - self.X_[b]
99-
neg_vv = self.X_[c] - self.X_[d]
109+
pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :]
110+
neg_vv = neg_pairs[:, 0, :] - neg_pairs[:, 1, :]
100111
A = self.A_
101112

102113
for it in xrange(self.max_iter):
@@ -195,4 +206,5 @@ def fit(self, X, y, random_state=np.random):
195206
random_state=random_state)
196207
pos_neg = c.positive_negative_pairs(num_constraints,
197208
random_state=random_state)
198-
return ITML.fit(self, X, pos_neg, bounds=self.bounds)
209+
pairs, y = wrap_pairs(X, pos_neg)
210+
return ITML.fit(self, pairs, y, bounds=self.bounds)

metric_learn/lfda.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,11 @@ def _sum_outer(x):
139139
def _eigh(a, b, dim):
140140
try:
141141
return scipy.sparse.linalg.eigsh(a, k=dim, M=b, which='LA')
142-
except (ValueError, scipy.sparse.linalg.ArpackNoConvergence):
143-
pass
144-
try:
145-
return scipy.linalg.eigh(a, b)
146142
except np.linalg.LinAlgError:
147-
pass
143+
pass # scipy already tried eigh for us
144+
except (ValueError, scipy.sparse.linalg.ArpackNoConvergence):
145+
try:
146+
return scipy.linalg.eigh(a, b)
147+
except np.linalg.LinAlgError:
148+
pass
148149
return scipy.linalg.eig(a, b)

metric_learn/lsml.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from sklearn.utils.validation import check_array, check_X_y
1515

1616
from .base_metric import BaseMetricLearner
17-
from .constraints import Constraints
17+
from .constraints import Constraints, wrap_pairs
1818

1919

2020
class LSML(BaseMetricLearner):
@@ -35,11 +35,13 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False):
3535
self.max_iter = max_iter
3636
self.verbose = verbose
3737

38-
def _prepare_inputs(self, X, constraints, weights):
39-
self.X_ = X = check_array(X)
40-
a,b,c,d = constraints
41-
self.vab_ = X[a] - X[b]
42-
self.vcd_ = X[c] - X[d]
38+
def _prepare_quadruplets(self, quadruplets, weights):
39+
pairs = check_array(quadruplets, accept_sparse=False,
40+
ensure_2d=False, allow_nd=True)
41+
42+
# check to make sure that no two constrained vectors are identical
43+
self.vab_ = quadruplets[:, 0, :] - quadruplets[:, 1, :]
44+
self.vcd_ = quadruplets[:, 2, :] - quadruplets[:, 3, :]
4345
if self.vab_.shape != self.vcd_.shape:
4446
raise ValueError('Constraints must have same length')
4547
if weights is None:
@@ -48,6 +50,7 @@ def _prepare_inputs(self, X, constraints, weights):
4850
self.w_ = weights
4951
self.w_ /= self.w_.sum() # weights must sum to 1
5052
if self.prior is None:
53+
X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
5154
self.prior_inv_ = np.atleast_2d(np.cov(X, rowvar=False))
5255
self.M_ = np.linalg.inv(self.prior_inv_)
5356
else:
@@ -57,19 +60,25 @@ def _prepare_inputs(self, X, constraints, weights):
5760
def metric(self):
5861
return self.M_
5962

60-
def fit(self, X, constraints, weights=None):
63+
def fit(self, quadruplets, weights=None):
6164
"""Learn the LSML model.
6265
6366
Parameters
6467
----------
65-
X : (n x d) data matrix
66-
each row corresponds to a single instance
67-
constraints : 4-tuple of arrays
68-
(a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d])
69-
weights : (m,) array of floats, optional
68+
quadruplets : array-like, shape=(n_constraints, 4, n_features)
69+
Each row corresponds to 4 points. In order to supervise the
70+
algorithm in the right way, we should have the four samples ordered
71+
in a way such that: d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3])
72+
for all 0 <= i < n_constraints.
73+
weights : (n_constraints,) array of floats, optional
7074
scale factor for each constraint
75+
76+
Returns
77+
-------
78+
self : object
79+
Returns the instance.
7180
"""
72-
self._prepare_inputs(X, constraints, weights)
81+
self._prepare_quadruplets(quadruplets, weights)
7382
step_sizes = np.logspace(-10, 0, 10)
7483
# Keep track of the best step size and the loss at that step.
7584
l_best = 0
@@ -179,6 +188,6 @@ def fit(self, X, y, random_state=np.random):
179188

180189
c = Constraints.random_subset(y, self.num_labeled,
181190
random_state=random_state)
182-
pairs = c.positive_negative_pairs(num_constraints, same_length=True,
191+
pos_neg = c.positive_negative_pairs(num_constraints, same_length=True,
183192
random_state=random_state)
184-
return LSML.fit(self, X, pairs, weights=self.weights)
193+
return LSML.fit(self, X[np.column_stack(pos_neg)], weights=self.weights)

0 commit comments

Comments
 (0)