-
Notifications
You must be signed in to change notification settings - Fork 229
[WIP] New API proposal #85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
3e5fbc3
0cbf1ae
300dada
8615634
a478baa
3744bec
214d991
4f4ce8b
ac00b8b
33561ab
7f40c56
402f397
47a9372
41dc123
5f63f24
fb0d118
df8a340
e3e7e0c
5a9c2e5
cf94740
52f4516
079bb13
da7c8e7
8192d11
2d0f1ca
6c59a1a
b70163a
a12eb9a
b1f6c23
b0ec33b
64f5762
2cf78dd
11a8ff1
a768cbf
335d8f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,17 +19,6 @@ def __init__(self, partial_labels): | |
self.known_label_idx, = np.where(partial_labels >= 0) | ||
self.known_labels = partial_labels[self.known_label_idx] | ||
|
||
def adjacency_matrix(self, num_constraints, random_state=np.random): | ||
a, b, c, d = self.positive_negative_pairs(num_constraints, | ||
random_state=random_state) | ||
row = np.concatenate((a, c)) | ||
col = np.concatenate((b, d)) | ||
data = np.ones_like(row, dtype=int) | ||
data[len(a):] = -1 | ||
adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2) | ||
# symmetrize | ||
return adj + adj.T | ||
|
||
def positive_negative_pairs(self, num_constraints, same_length=False, | ||
random_state=np.random): | ||
a, b = self._pairs(num_constraints, same_label=True, | ||
|
@@ -155,3 +144,34 @@ def pairs_from_labels(y): | |
def triplets_from_labels(y): | ||
# TODO: to be implemented | ||
raise NotImplementedError | ||
|
||
|
||
def unwrap_pairs(constrained_dataset, y): | ||
a = constrained_dataset.c[(y == 0)[:, 0]][:, 0] | ||
b = constrained_dataset.c[(y == 0)[:, 0]][:, 1] | ||
c = constrained_dataset.c[(y == 1)[:, 0]][:, 0] | ||
d = constrained_dataset.c[(y == 1)[:, 0]][:, 1] | ||
X = constrained_dataset.X | ||
return X, [a, b, c, d] | ||
|
||
def wrap_pairs(X, constraints): | ||
a = np.array(constraints[0]) | ||
b = np.array(constraints[1]) | ||
c = np.array(constraints[2]) | ||
d = np.array(constraints[3]) | ||
constraints = np.vstack([np.hstack([a[:, None], b[:, None]]), | ||
np.hstack([c[:, None], d[:, None]])]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a, b, c, d = constraints
constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d))))
# or if we have numpy 1.13+
constraints = np.block([[a, b], [c, d]]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, will do |
||
y = np.vstack([np.zeros((len(a), 1)), np.ones((len(c), 1))]) | ||
constrained_dataset = ConstrainedDataset(X, constraints) | ||
return constrained_dataset, y | ||
|
||
def unwrap_to_graph(constrained_dataset, y): | ||
|
||
X, [a, b, c, d] = unwrap_pairs(constrained_dataset, y) | ||
row = np.concatenate((a, c)) | ||
col = np.concatenate((b, d)) | ||
data = np.ones_like(row, dtype=int) | ||
data[len(a):] = -1 | ||
adj = coo_matrix((data, (row, col)), shape=(constrained_dataset.X.shape[0],) | ||
* 2) | ||
return constrained_dataset.X, adj + adj.T |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ | |
from sklearn.utils.validation import check_array, check_X_y | ||
|
||
from .base_metric import PairsMetricLearner, SupervisedMetricLearner | ||
from .constraints import Constraints | ||
from .constraints import Constraints, unwrap_pairs, wrap_pairs | ||
from ._util import vector_norm | ||
|
||
|
||
|
@@ -73,19 +73,19 @@ def _process_inputs(self, X, constraints, bounds): | |
self.A_ = check_array(self.A0) | ||
return a,b,c,d | ||
|
||
def fit(self, X, constraints, bounds=None): | ||
def fit(self, constrained_dataset, y, bounds=None): | ||
"""Learn the ITML model. | ||
|
||
Parameters | ||
---------- | ||
X : (n x d) data matrix | ||
each row corresponds to a single instance | ||
constraints : 4-tuple of arrays | ||
(a,b,c,d) indices into X, with (a,b) specifying positive and (c,d) | ||
negative pairs | ||
constrained_dataset : ConstrainedDataset | ||
with constraints being an array of shape [n_constraints, 2] | ||
y : array-like, shape (n x 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, indeed, will change this |
||
labels of the constraints | ||
bounds : list (pos,neg) pairs, optional | ||
bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg | ||
""" | ||
X, constraints = unwrap_pairs(constrained_dataset, y) | ||
a,b,c,d = self._process_inputs(X, constraints, bounds) | ||
gamma = self.gamma | ||
num_pos = len(a) | ||
|
@@ -195,4 +195,5 @@ def fit(self, X, y, random_state=np.random): | |
random_state=random_state) | ||
pos_neg = c.positive_negative_pairs(num_constraints, | ||
random_state=random_state) | ||
return ITML.fit(self, X, pos_neg, bounds=self.bounds) | ||
constrained_dataset, y = wrap_pairs(X, pos_neg) | ||
return ITML.fit(self, constrained_dataset, y, bounds=self.bounds) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,11 +13,11 @@ | |
from six.moves import xrange | ||
from sklearn.utils.validation import check_array, check_X_y | ||
|
||
from .base_metric import PairsMetricLearner, SupervisedMetricLearner | ||
from .constraints import Constraints | ||
from .base_metric import SupervisedMetricLearner, QuadrupletsMetricLearner | ||
from .constraints import Constraints, ConstrainedDataset | ||
|
||
|
||
class LSML(PairsMetricLearner): | ||
class LSML(QuadrupletsMetricLearner): | ||
def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False): | ||
"""Initialize LSML. | ||
|
||
|
@@ -57,18 +57,23 @@ def _prepare_inputs(self, X, constraints, weights): | |
def metric(self): | ||
return self.M_ | ||
|
||
def fit(self, X, constraints, weights=None): | ||
def fit(self, constrained_dataset, y=None, weights=None): | ||
"""Learn the LSML model. | ||
|
||
Parameters | ||
---------- | ||
X : (n x d) data matrix | ||
each row corresponds to a single instance | ||
constraints : 4-tuple of arrays | ||
(a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d]) | ||
constrained_dataset : ConstrainedDataset | ||
with constraints being an array of shape [n_constraints, 4]. It | ||
should be the concatenation of 4 column vectors a, b, c and d, | ||
such that: ``d(X[a[i]],X[b[i]]) < d(X[c[i]],X[d[i]])`` for every | ||
constraint index ``i``. | ||
y : object | ||
Not used, for scikit-learn compatibility | ||
weights : (m,) array of floats, optional | ||
scale factor for each constraint | ||
""" | ||
X = constrained_dataset.X | ||
constraints = [constrained_dataset.c[:, i].ravel() for i in range(4)] | ||
self._prepare_inputs(X, constraints, weights) | ||
step_sizes = np.logspace(-10, 0, 10) | ||
# Keep track of the best step size and the loss at that step. | ||
|
@@ -181,4 +186,7 @@ def fit(self, X, y, random_state=np.random): | |
random_state=random_state) | ||
pairs = c.positive_negative_pairs(num_constraints, same_length=True, | ||
random_state=random_state) | ||
return LSML.fit(self, X, pairs, weights=self.weights) | ||
constrained_dataset = ConstrainedDataset(X, np.hstack([pairs[i][:, None] | ||
for i in | ||
range(4)])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great, thanks ! |
||
return LSML.fit(self, constrained_dataset, weights=self.weights) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,8 @@ | |
from sklearn.utils.validation import check_array, check_X_y | ||
|
||
from .base_metric import PairsMetricLearner, SupervisedMetricLearner | ||
from .constraints import Constraints | ||
from .constraints import Constraints, ConstrainedDataset, unwrap_pairs, \ | ||
wrap_pairs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Style nitpick: Use parens to break up long import lists: from .constraints import (
Constraints, ConstrainedDataset, unwrap_pairs, wrap_pairs) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks ! |
||
from ._util import vector_norm | ||
|
||
|
||
|
@@ -58,17 +59,17 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, | |
self.diagonal_c = diagonal_c | ||
self.verbose = verbose | ||
|
||
def fit(self, X, constraints): | ||
def fit(self, constrained_dataset, y): | ||
"""Learn the MMC model. | ||
|
||
Parameters | ||
---------- | ||
X : (n x d) data matrix | ||
each row corresponds to a single instance | ||
constraints : 4-tuple of arrays | ||
(a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) | ||
dissimilar pairs | ||
constrained_dataset : ConstrainedDataset | ||
with constraints being an array of shape [n_constraints, 2] | ||
y : array-like, shape (n x 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will do |
||
labels of the constraints | ||
""" | ||
X, constraints = unwrap_pairs(constrained_dataset, y) | ||
constraints = self._process_inputs(X, constraints) | ||
if self.diagonal: | ||
return self._fit_diag(X, constraints) | ||
|
@@ -437,4 +438,5 @@ def fit(self, X, y, random_state=np.random): | |
random_state=random_state) | ||
pos_neg = c.positive_negative_pairs(num_constraints, | ||
random_state=random_state) | ||
return MMC.fit(self, X, pos_neg) | ||
constrained_dataset, y = wrap_pairs(X, pos_neg) | ||
return MMC.fit(self, constrained_dataset, y) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
from sklearn.utils.validation import check_array | ||
|
||
from .base_metric import PairsMetricLearner, SupervisedMetricLearner | ||
from .constraints import Constraints | ||
from .constraints import Constraints, wrap_pairs, unwrap_to_graph | ||
|
||
|
||
class SDML(PairsMetricLearner): | ||
|
@@ -56,21 +56,22 @@ def _prepare_inputs(self, X, W): | |
def metric(self): | ||
return self.M_ | ||
|
||
def fit(self, X, W): | ||
def fit(self, constrained_dataset, y): | ||
"""Learn the SDML model. | ||
|
||
Parameters | ||
---------- | ||
X : array-like, shape (n, d) | ||
data matrix, where each row corresponds to a single instance | ||
W : array-like, shape (n, n) | ||
connectivity graph, with +1 for positive pairs and -1 for negative | ||
constrained_dataset : ConstrainedDataset | ||
with constraints being an array of shape [n_constraints, 2] | ||
y : array-like, shape (n x 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will do |
||
labels of the constraints | ||
|
||
Returns | ||
------- | ||
self : object | ||
Returns the instance. | ||
""" | ||
X, W = unwrap_to_graph(constrained_dataset, y) | ||
loss_matrix = self._prepare_inputs(X, W) | ||
P = self.M_ + self.balance_param * loss_matrix | ||
emp_cov = pinvh(P) | ||
|
@@ -131,5 +132,7 @@ def fit(self, X, y, random_state=np.random): | |
|
||
c = Constraints.random_subset(y, self.num_labeled, | ||
random_state=random_state) | ||
adj = c.adjacency_matrix(num_constraints, random_state=random_state) | ||
return SDML.fit(self, X, adj) | ||
pos_neg = c.positive_negative_pairs(num_constraints, | ||
random_state=random_state) | ||
constrained_dataset, y = wrap_pairs(X, pos_neg) | ||
return SDML.fit(self, constrained_dataset, y) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe think of a more concise naming convention instead of
constrained_dataset
as it is gonna be used all over the place.X_constrained
perhaps?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree,
X_constrained
seems good