diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py
index cc60049d..5a7508c0 100644
--- a/metric_learn/__init__.py
+++ b/metric_learn/__init__.py
@@ -9,3 +9,4 @@
 from .nca import NCA
 from .lfda import LFDA
 from .rca import RCA, RCA_Supervised
+from .mlkr import MLKR
diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py
new file mode 100644
index 00000000..7c279cc8
--- /dev/null
+++ b/metric_learn/mlkr.py
@@ -0,0 +1,132 @@
+"""
+Metric Learning for Kernel Regression (MLKR), Weinberger et al.,
+
+MLKR is an algorithm for supervised metric learning, which learns a distance
+function by directly minimising the leave-one-out regression error. This
+algorithm can also be viewed as a supervised variation of PCA and can be used
+for dimensionality reduction and high dimensional data visualization.
+"""
+from __future__ import division
+import numpy as np
+from six.moves import xrange
+from scipy.spatial.distance import pdist, squareform
+
+from .base_metric import BaseMetricLearner
+
+class MLKR(BaseMetricLearner):
+    """Metric Learning for Kernel Regression (MLKR)"""
+    def __init__(self, A0=None, epsilon=0.01, alpha=0.0001):
+        """
+        MLKR initialization
+
+        Parameters
+        ----------
+        A0: Initialization of matrix A. Defaults to the identity matrix.
+        epsilon: Step size for gradient descent.
+        alpha: Stopping criterion for loss function in gradient descent.
+        """
+        self.params = {
+            "A0": A0,
+            "epsilon": epsilon,
+            "alpha": alpha
+        }
+
+    def _process_inputs(self, X, y):
+        self.X = np.array(X, copy=False)
+        y = np.array(y, copy=False)
+        if X.ndim == 1:
+            X = X[:, np.newaxis]
+        if y.ndim == 1:
+            y = y[:, np.newaxis]
+        n, d = X.shape
+        if y.shape[0] != n:
+            raise ValueError('Data and label lengths mismatch: %d != %d'
+                             % (n, y.shape[0]))
+        return y, n, d
+
+    def fit(self, X, y):
+        """
+        Fit MLKR model
+
+        Parameters:
+        ----------
+        X : (n x d) array of samples
+        y : (n) data labels
+
+        Returns:
+        -------
+        self: Instance of self
+        """
+        y, n, d = self._process_inputs(X, y)
+        if self.params['A0'] is None:
+            A = np.identity(d)  # Initialize A as eye matrix
+        else:
+            A = self.params['A0']
+            if A.shape != (d, d):
+                raise ValueError('A0 should be a square matrix of dimension'
+                                 ' %d. %s shape was provided' % (d, A.shape))
+        cost = np.Inf
+        # Gradient descent procedure
+        alpha = self.params['alpha']
+        epsilon = self.params['epsilon']
+        while cost > alpha:
+            K = self._computeK(X, A)
+            yhat = self._computeyhat(y, K)
+            cost = np.sum(np.square(yhat - y))
+            # Compute gradient
+            sum_i = 0
+            for i in xrange(n):
+                sum_j = 0
+                for j in xrange(n):
+                    diffK = (yhat[j] - y[j]) * K[i, j]
+                    x_ij = (X[i, :] - X[j, :])[:, np.newaxis]
+                    x_ijT = x_ij.T
+                    sum_j += diffK * x_ij.dot(x_ijT)
+                sum_i += (yhat[i] - y[i]) * sum_j
+            gradient = 4 * A.dot(sum_i)
+            A -= epsilon * gradient
+        self._transformer = A
+        return self
+
+    @staticmethod
+    def _computeK(X, A):
+        """
+        Internal helper function to compute K matrix.
+
+        Parameters:
+        ----------
+        X: (n x d) array of samples
+        A: (d x d) 'A' matrix
+
+        Returns:
+        -------
+        K: (n x n) K matrix where Kij = exp(-distance(x_i, x_j)) where
+           distance is defined as squared L2 norm of (x_i - x_j)
+        """
+        dist_mat = pdist(X, metric='mahalanobis', VI=A.T.dot(A))
+        return np.exp(squareform(-(dist_mat ** 2)))
+
+    @staticmethod
+    def _computeyhat(y, K):
+        """
+        Internal helper function to compute yhat matrix.
+
+        Parameters:
+        ----------
+        y: (n) data labels
+        K: (n x n) K matrix
+
+        Returns:
+        -------
+        yhat: (n x 1) yhat matrix
+        """
+        K_mod = np.copy(K)
+        np.fill_diagonal(K_mod, 0)
+        numerator = K_mod.dot(y)
+        denominator = np.sum(K_mod, 1)[:, np.newaxis]
+        denominator[denominator == 0] = 2.2204e-16  # eps val in octave
+        yhat = numerator / denominator
+        return yhat
+
+    def transformer(self):
+        return self._transformer
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 2ef97237..c56bbb99 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -7,7 +7,7 @@
 
 from metric_learn import (
     LMNN, NCA, LFDA, Covariance,
-    LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised)
+    LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised, MLKR)
 # Import this specially for testing.
 from metric_learn.lmnn import python_LMNN
 
@@ -113,6 +113,13 @@ def test_iris(self):
     csep = class_separation(rca.transform(), self.iris_labels)
     self.assertLess(csep, 0.25)
 
+class TestMLKR(MetricTestCase):
+  def test_iris(self):
+    mlkr = MLKR(epsilon=10, alpha=10)  # for faster testing
+    mlkr.fit(self.iris_points, self.iris_labels)
+    csep = class_separation(mlkr.transform(), self.iris_labels)
+    self.assertLess(csep, 0.25)
+
 
 if __name__ == '__main__':
   unittest.main()