scikit-learn-contrib · bellet · Jun 12, 2019 · Apr 23, 2019 · Apr 23, 2019 · Apr 23, 2019
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
@@ -44,20 +44,18 @@ class RCA(MahalanobisMixin, TransformerMixin):
       The learned linear transformation ``L``.
   """
 
-  def __init__(self, num_dims=None, pca_comps=None, preprocessor=None):
+  def __init__(self, num_dims=None, pca_comps='deprecated', preprocessor=None):
     """Initialize the learner.
 
     Parameters
     ----------
     num_dims : int, optional
         embedding dimension (default: original dimension of data)
 
-    pca_comps : int, float, None or string
-        Number of components to keep during PCA preprocessing.
-        If None (default), does not perform PCA.
-        If ``0 < pca_comps < 1``, it is used as
-        the minimum explained variance ratio.
-        See sklearn.decomposition.PCA for more details.
+    pca_comps : Not used
+      .. deprecated:: 0.5.0
+         `pca_comps` was deprecated in version 0.5.0 and will
+         be removed in 0.6.0.
 
     preprocessor : array-like, shape=(n_samples, n_features) or callable
         The preprocessor to call to get tuples from indices. If array-like,
@@ -98,26 +96,24 @@ def fit(self, X, chunks):
         When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
         When ``chunks[i] == j``, point i belongs to chunklet j.
     """
+    if self.pca_comps != 'deprecated':
+      warnings.warn('"pca_comps" parameter is not used.'
+                    ' It has been deprecated in version 0.5.0 and will be'
+                    'removed in 0.6.0', DeprecationWarning)
+
     X = self._prepare_inputs(X, ensure_min_samples=2)
 
     # PCA projection to remove noise and redundant information.
-    if self.pca_comps is not None:
-      pca = decomposition.PCA(n_components=self.pca_comps)
-      X_t = pca.fit_transform(X)
-      M_pca = pca.components_
-    else:
-      X_t = X - X.mean(axis=0)
-      M_pca = None
 
     chunks = np.asanyarray(chunks, dtype=int)
-    chunk_mask, chunked_data = _chunk_mean_centering(X_t, chunks)
+    chunk_mask, chunked_data = _chunk_mean_centering(X, chunks)
 
     inner_cov = np.atleast_2d(np.cov(chunked_data, rowvar=0, bias=1))
-    dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X_t)
+    dim = self._check_dimension(np.linalg.matrix_rank(inner_cov), X)
 
     # Fisher Linear Discriminant projection
-    if dim < X_t.shape[1]:
-      total_cov = np.cov(X_t[chunk_mask], rowvar=0)
+    if dim < X.shape[1]:
+      total_cov = np.cov(X[chunk_mask], rowvar=0)
       tmp = np.linalg.lstsq(total_cov, inner_cov)[0]
       vals, vecs = np.linalg.eig(tmp)
       inds = np.argsort(vals)[:dim]
@@ -127,9 +123,6 @@ def fit(self, X, chunks):
     else:
       self.transformer_ = _inv_sqrtm(inner_cov).T
 
-    if M_pca is not None:
-        self.transformer_ = np.atleast_2d(self.transformer_.dot(M_pca))
-
     return self
 
 

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
@@ -18,7 +18,7 @@
   HAS_SKGGM = True
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           LSML_Supervised, ITML_Supervised, SDML_Supervised,
-                          RCA_Supervised, MMC_Supervised, SDML)
+                          RCA_Supervised, MMC_Supervised, SDML, RCA)
 # Import this specially for testing.
 from metric_learn.constraints import wrap_pairs
 from metric_learn.lmnn import python_LMNN
@@ -530,6 +530,23 @@ def test_feature_null_variance(self):
     csep = class_separation(rca.transform(X), self.iris_labels)
     self.assertLess(csep, 0.30)
 
+  def test_deprecation_pca_comps(self):
+    # test that a deprecation message is thrown if pca_comps is set at
+    # initialization
+    # TODO: remove in v.0.6
+    X, y = make_classification(random_state=42, n_samples=100)
+    rca_supervised = RCA_Supervised(pca_comps=X.shape[1], num_chunks=20)
+    msg = ('"pca_comps" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0')
+    assert_warns_message(DeprecationWarning, msg, rca_supervised.fit, X, y)
+
+    rca = RCA(pca_comps=X.shape[1])
+    msg = ('"pca_comps" parameter is not used.'
+           ' It has been deprecated in version 0.5.0 and will be'
+           'removed in 0.6.0')
+    assert_warns_message(DeprecationWarning, msg, rca.fit, X, y)
+
 
 class TestMLKR(MetricTestCase):
   def test_iris(self):

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
@@ -64,7 +64,8 @@ def test_sdml(self):
 
   def test_rca(self):
     self.assertEqual(str(metric_learn.RCA()),
-                     "RCA(num_dims=None, pca_comps=None, preprocessor=None)")
+                     "RCA(num_dims=None, pca_comps='deprecated', "
+                     "preprocessor=None)")
     self.assertEqual(str(metric_learn.RCA_Supervised()),
                      "RCA_Supervised(chunk_size=2, num_chunks=100, "
                      "num_dims=None, pca_comps=None,\n        "

diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
@@ -89,9 +89,8 @@ def stable_init(self, sparsity_param=0.01, num_labeled='deprecated',
     dSDML.__init__ = stable_init
     check_estimator(dSDML)
 
-  # This fails because the default num_chunks isn't data-dependent.
-  # def test_rca(self):
-  #   check_estimator(RCA_Supervised)
+  def test_rca(self):
+    check_estimator(RCA_Supervised)
 
 
 RNG = check_random_state(0)