Adding LFDA method

perimosocordiae · perimosocordiae · commit a4028577f819 · 2015-09-21T21:42:09.000-05:00
Iris results seem to match the figure in the original paper.
diff --git a/README.rst b/README.rst
@@ -12,6 +12,7 @@ Metric Learning algorithms in Python.
 -  Sparse Determinant Metric Learning (SDML)
 -  Least Squares Metric Learning (LSML)
 -  Neighborhood Components Analysis (NCA)
+-  Local Fisher Discriminant Analysis (LFDA)
 
 **Dependencies**
 
diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py
@@ -3,3 +3,4 @@
 from lsml import LSML
 from sdml import SDML
 from nca import NCA
+from lfda import LFDA
diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py
@@ -0,0 +1,103 @@
+from __future__ import division
+import numpy as np
+import scipy
+from sklearn.metrics import pairwise_distances
+from base_metric import BaseMetricLearner
+
+
+class LFDA(BaseMetricLearner):
+  '''
+  Local Fisher Discriminant Analysis for Supervised Dimensionality Reduction
+  Sugiyama, ICML 2006
+  '''
+  def __init__(self, dim=None, k=7, metric='weighted'):
+    '''
+    dim : dimensionality of reduced space (defaults to dimension of X)
+    k : nearest neighbor used in local scaling method (default: 7)
+    metric : type of metric in the embedding space (default: 'weighted')
+      'weighted'        - weighted eigenvectors
+      'orthonormalized' - orthonormalized
+      'plain'           - raw eigenvectors
+    '''
+    if metric not in ('weighted', 'orthonormalized', 'plain'):
+      raise ValueError('Invalid metric: %r' % metric)
+    self.dim = dim
+    self.metric = metric
+    self.k = k
+
+  def transformer(self):
+    return self._tranformer
+
+  def _process_inputs(self, X, Y):
+    X = np.asanyarray(X)
+    self.X = X
+    n, d = X.shape
+    unique_classes, Y = np.unique(Y, return_inverse=True)
+    num_classes = len(unique_classes)
+
+    if self.dim is None:
+      self.dim = d
+    elif not 0 < self.dim <= d:
+      raise ValueError('Invalid embedding dimension, must be in [1,%d]' % d)
+
+    if not 0 < self.k < d:
+      raise ValueError('Invalid k, must be in [0,%d]' % (d-1))
+
+    return X, Y, num_classes, n, d
+
+  def fit(self, X, Y):
+    '''
+     X: (n, d) array-like of samples
+     Y: (n,) array-like of class labels
+    '''
+    X, Y, num_classes, n, d = self._process_inputs(X, Y)
+    tSb = np.zeros((d,d))
+    tSw = np.zeros((d,d))
+
+    for c in xrange(num_classes):
+      Xc = X[Y==c]
+      nc = Xc.shape[0]
+
+      # classwise affinity matrix
+      dist = pairwise_distances(Xc, metric='l2', squared=True)
+      # distances to k-th nearest neighbor
+      k = min(self.k, nc-1)
+      sigma = np.sqrt(np.partition(dist, k, axis=0)[:,k])
+
+      local_scale = np.outer(sigma, sigma)
+      with np.errstate(divide='ignore', invalid='ignore'):
+        A = np.exp(-dist/local_scale)
+        A[local_scale==0] = 0
+
+      G = Xc.T.dot(A.sum(axis=0)[:,None] * Xc) - Xc.T.dot(A).dot(Xc)
+      tSb += G/n + (1-nc/n)*Xc.T.dot(Xc) + _sum_outer(Xc)/n
+      tSw += G/nc
+
+    tSb -= _sum_outer(X)/n - tSw
+
+    # symmetrize
+    tSb += tSb.T
+    tSb /= 2
+    tSw += tSw.T
+    tSw /= 2
+
+    if self.dim == d:
+      vals, vecs = scipy.linalg.eigh(tSb, tSw)
+    else:
+      vals, vecs = scipy.sparse.linalg.eigsh(tSb, k=self.dim, M=tSw, which='LA')
+
+    order = np.argsort(-vals)[:self.dim]
+    vals = vals[order]
+    vecs = vecs[:,order]
+
+    if self.metric == 'weighted':
+       vecs *= np.sqrt(vals)
+    elif self.metric == 'orthonormalized':
+       vecs, _ = np.linalg.qr(vecs)
+
+    self._tranformer = vecs.T
+
+
+def _sum_outer(x):
+  s = x.sum(axis=0)
+  return np.outer(s, s)
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
@@ -5,7 +5,7 @@
 from sklearn.datasets import load_iris
 from numpy.testing import assert_array_almost_equal
 
-from metric_learn import LSML, ITML, LMNN, SDML, NCA
+from metric_learn import LSML, ITML, LMNN, SDML, NCA, LFDA
 # Import this specially for testing.
 from metric_learn.lmnn import python_LMNN
 
@@ -96,5 +96,12 @@ def test_iris(self):
     assert_array_almost_equal(expected, nca.transformer(), decimal=3)
 
 
+class TestLFDA(MetricTestCase):
+  def test_iris(self):
+    lfda = LFDA(k=2, dim=2)
+    lfda.fit(self.iris_points, self.iris_labels)
+    csep = class_separation(lfda.transform(), self.iris_labels)
+    self.assertLess(csep, 0.15)
+
 if __name__ == '__main__':
   unittest.main()