scikit-learn-contrib · wdevazelhes · Feb 26, 2018 · Feb 26, 2018 · Feb 27, 2018 · Feb 28, 2018
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -59,7 +59,7 @@ def fit(self, X, y):
 
 class WeaklySupervisedMetricLearner(BaseMetricLearner):
 
-  def fit(self, X, constraints):
+  def fit(self, constrained_dataset, y):
     return NotImplementedError
 
 
@@ -77,3 +77,9 @@ def __init__(self):
                               'instantiated')
   # TODO: introduce specific scoring functions etc
 
+class QuadrupletsMetricLearner(WeaklySupervisedMetricLearner):
+
+  def __init__(self):
+    raise NotImplementedError('QuadrupletsMetricLearner should not be '
+                              'instantiated')
+  # TODO: introduce specific scoring functions etc
diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py
@@ -19,17 +19,6 @@ def __init__(self, partial_labels):
     self.known_label_idx, = np.where(partial_labels >= 0)
     self.known_labels = partial_labels[self.known_label_idx]
 
-  def adjacency_matrix(self, num_constraints, random_state=np.random):
-    a, b, c, d = self.positive_negative_pairs(num_constraints,
-                                              random_state=random_state)
-    row = np.concatenate((a, c))
-    col = np.concatenate((b, d))
-    data = np.ones_like(row, dtype=int)
-    data[len(a):] = -1
-    adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2)
-    # symmetrize
-    return adj + adj.T
-
   def positive_negative_pairs(self, num_constraints, same_length=False,
                               random_state=np.random):
     a, b = self._pairs(num_constraints, same_label=True,
@@ -155,3 +144,34 @@ def pairs_from_labels(y):
   def triplets_from_labels(y):
     # TODO: to be implemented
     raise NotImplementedError
+
+
+def unwrap_pairs(constrained_dataset, y):
+  a = constrained_dataset.c[(y == 0)[:, 0]][:, 0]
+  b = constrained_dataset.c[(y == 0)[:, 0]][:, 1]
+  c = constrained_dataset.c[(y == 1)[:, 0]][:, 0]
+  d = constrained_dataset.c[(y == 1)[:, 0]][:, 1]
+  X = constrained_dataset.X
+  return X, [a, b, c, d]
+
+def wrap_pairs(X, constraints):
+  a = np.array(constraints[0])
+  b = np.array(constraints[1])
+  c = np.array(constraints[2])
+  d = np.array(constraints[3])
+  constraints = np.vstack([np.hstack([a[:, None], b[:, None]]),
+                           np.hstack([c[:, None], d[:, None]])])
+  y = np.vstack([np.zeros((len(a), 1)), np.ones((len(c), 1))])
+  constrained_dataset = ConstrainedDataset(X, constraints)
+  return constrained_dataset, y
+
+def unwrap_to_graph(constrained_dataset, y):
+
+  X, [a, b, c, d] = unwrap_pairs(constrained_dataset, y)
+  row = np.concatenate((a, c))
+  col = np.concatenate((b, d))
+  data = np.ones_like(row, dtype=int)
+  data[len(a):] = -1
+  adj = coo_matrix((data, (row, col)), shape=(constrained_dataset.X.shape[0],)
+                                             * 2)
+  return constrained_dataset.X, adj + adj.T
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
@@ -20,7 +20,7 @@
 from sklearn.utils.validation import check_array, check_X_y
 
 from .base_metric import PairsMetricLearner, SupervisedMetricLearner
-from .constraints import Constraints
+from .constraints import Constraints, unwrap_pairs, wrap_pairs
 from ._util import vector_norm
 
 
@@ -73,19 +73,19 @@ def _process_inputs(self, X, constraints, bounds):
       self.A_ = check_array(self.A0)
     return a,b,c,d
 
-  def fit(self, X, constraints, bounds=None):
+  def fit(self, constrained_dataset, y, bounds=None):
     """Learn the ITML model.
 
     Parameters
     ----------
-    X : (n x d) data matrix
-        each row corresponds to a single instance
-    constraints : 4-tuple of arrays
-        (a,b,c,d) indices into X, with (a,b) specifying positive and (c,d)
-        negative pairs
+    constrained_dataset : ConstrainedDataset
+        with constraints being an array of shape [n_constraints, 2]
+    y : array-like, shape (n x 1)
+        labels of the constraints
     bounds : list (pos,neg) pairs, optional
         bounds on similarity, s.t. d(X[a],X[b]) < pos and d(X[c],X[d]) > neg
     """
+    X, constraints = unwrap_pairs(constrained_dataset, y)
     a,b,c,d = self._process_inputs(X, constraints, bounds)
     gamma = self.gamma
     num_pos = len(a)
@@ -195,4 +195,5 @@ def fit(self, X, y, random_state=np.random):
                                   random_state=random_state)
     pos_neg = c.positive_negative_pairs(num_constraints,
                                         random_state=random_state)
-    return ITML.fit(self, X, pos_neg, bounds=self.bounds)
+    constrained_dataset, y = wrap_pairs(X, pos_neg)
+    return ITML.fit(self, constrained_dataset, y, bounds=self.bounds)
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
@@ -13,11 +13,11 @@
 from six.moves import xrange
 from sklearn.utils.validation import check_array, check_X_y
 
-from .base_metric import PairsMetricLearner, SupervisedMetricLearner
-from .constraints import Constraints
+from .base_metric import SupervisedMetricLearner, QuadrupletsMetricLearner
+from .constraints import Constraints, ConstrainedDataset
 
 
-class LSML(PairsMetricLearner):
+class LSML(QuadrupletsMetricLearner):
   def __init__(self, tol=1e-3, max_iter=1000, prior=None, verbose=False):
     """Initialize LSML.
 
@@ -57,18 +57,23 @@ def _prepare_inputs(self, X, constraints, weights):
   def metric(self):
     return self.M_
 
-  def fit(self, X, constraints, weights=None):
+  def fit(self, constrained_dataset, y=None, weights=None):
     """Learn the LSML model.
 
     Parameters
     ----------
-    X : (n x d) data matrix
-        each row corresponds to a single instance
-    constraints : 4-tuple of arrays
-        (a,b,c,d) indices into X, such that d(X[a],X[b]) < d(X[c],X[d])
+    constrained_dataset : ConstrainedDataset
+        with constraints being an array of shape [n_constraints, 4]. It
+        should be the concatenation of 4 column vectors a, b, c and d,
+        such that: ``d(X[a[i]],X[b[i]]) < d(X[c[i]],X[d[i]])`` for every
+        constraint index ``i``.
+    y : object
+        Not used, for scikit-learn compatibility
     weights : (m,) array of floats, optional
         scale factor for each constraint
     """
+    X = constrained_dataset.X
+    constraints = [constrained_dataset.c[:, i].ravel() for i in range(4)]
     self._prepare_inputs(X, constraints, weights)
     step_sizes = np.logspace(-10, 0, 10)
     # Keep track of the best step size and the loss at that step.
@@ -181,4 +186,7 @@ def fit(self, X, y, random_state=np.random):
                                   random_state=random_state)
     pairs = c.positive_negative_pairs(num_constraints, same_length=True,
                                       random_state=random_state)
-    return LSML.fit(self, X, pairs, weights=self.weights)
+    constrained_dataset = ConstrainedDataset(X, np.hstack([pairs[i][:, None]
+                                                           for i in
+                                                           range(4)]))
+    return LSML.fit(self, constrained_dataset, weights=self.weights)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
@@ -23,7 +23,8 @@
 from sklearn.utils.validation import check_array, check_X_y
 
 from .base_metric import PairsMetricLearner, SupervisedMetricLearner
-from .constraints import Constraints
+from .constraints import Constraints, ConstrainedDataset, unwrap_pairs, \
+  wrap_pairs
 from ._util import vector_norm
 
 
@@ -58,17 +59,17 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
     self.diagonal_c = diagonal_c
     self.verbose = verbose
 
-  def fit(self, X, constraints):
+  def fit(self, constrained_dataset, y):
     """Learn the MMC model.
 
     Parameters
     ----------
-    X : (n x d) data matrix
-        each row corresponds to a single instance
-    constraints : 4-tuple of arrays
-        (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d)
-        dissimilar pairs
+    constrained_dataset : ConstrainedDataset
+        with constraints being an array of shape [n_constraints, 2]
+    y : array-like, shape (n x 1)
+        labels of the constraints
     """
+    X, constraints = unwrap_pairs(constrained_dataset, y)
     constraints = self._process_inputs(X, constraints)
     if self.diagonal:
       return self._fit_diag(X, constraints)
@@ -437,4 +438,5 @@ def fit(self, X, y, random_state=np.random):
                                   random_state=random_state)
     pos_neg = c.positive_negative_pairs(num_constraints,
                                         random_state=random_state)
-    return MMC.fit(self, X, pos_neg)
+    constrained_dataset, y = wrap_pairs(X, pos_neg)
+    return MMC.fit(self, constrained_dataset, y)
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
@@ -16,7 +16,7 @@
 from sklearn.utils.validation import check_array
 
 from .base_metric import PairsMetricLearner, SupervisedMetricLearner
-from .constraints import Constraints
+from .constraints import Constraints, wrap_pairs, unwrap_to_graph
 
 
 class SDML(PairsMetricLearner):
@@ -56,21 +56,22 @@ def _prepare_inputs(self, X, W):
   def metric(self):
     return self.M_
 
-  def fit(self, X, W):
+  def fit(self, constrained_dataset, y):
     """Learn the SDML model.
 
     Parameters
     ----------
-    X : array-like, shape (n, d)
-        data matrix, where each row corresponds to a single instance
-    W : array-like, shape (n, n)
-        connectivity graph, with +1 for positive pairs and -1 for negative
+    constrained_dataset : ConstrainedDataset
+        with constraints being an array of shape [n_constraints, 2]
+    y : array-like, shape (n x 1)
+        labels of the constraints
 
     Returns
     -------
     self : object
         Returns the instance.
     """
+    X, W = unwrap_to_graph(constrained_dataset, y)
     loss_matrix = self._prepare_inputs(X, W)
     P = self.M_ + self.balance_param * loss_matrix
     emp_cov = pinvh(P)
@@ -131,5 +132,7 @@ def fit(self, X, y, random_state=np.random):
 
     c = Constraints.random_subset(y, self.num_labeled,
                                   random_state=random_state)
-    adj = c.adjacency_matrix(num_constraints, random_state=random_state)
-    return SDML.fit(self, X, adj)
+    pos_neg = c.positive_negative_pairs(num_constraints,
+                                              random_state=random_state)
+    constrained_dataset, y = wrap_pairs(X, pos_neg)
+    return SDML.fit(self, constrained_dataset, y)
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
@@ -1,5 +1,6 @@
 import unittest
 import numpy as np
+from metric_learn.constraints import wrap_pairs
 from six.moves import xrange
 from sklearn.metrics import pairwise_distances
 from sklearn.datasets import load_iris
@@ -160,7 +161,7 @@ def test_iris(self):
 
     # Full metric
     mmc = MMC(convergence_threshold=0.01)
-    mmc.fit(self.iris_points, [a,b,c,d])
+    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
     expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265],
                 [+0.00083371, +0.00149466, -0.00200719, -0.00296284],
                 [-0.00111959, -0.00200719, +0.00269546, +0.00397881],
@@ -169,7 +170,7 @@ def test_iris(self):
 
     # Diagonal metric
     mmc = MMC(diagonal=True)
-    mmc.fit(self.iris_points, [a,b,c,d])
+    mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d]))
     expected = [0, 0, 1.21045968, 1.22552608]
     assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6)