17
17
from six .moves import xrange
18
18
from sklearn import decomposition
19
19
from sklearn .base import TransformerMixin
20
+ from sklearn .exceptions import ChangedBehaviorWarning
20
21
21
22
from ._util import _check_n_components
22
23
from .base_metric import MahalanobisMixin
@@ -48,7 +49,7 @@ class RCA(MahalanobisMixin, TransformerMixin):
48
49
"""
49
50
50
51
def __init__ (self , n_components = None , num_dims = 'deprecated' ,
51
- pca_comps = None , preprocessor = None ):
52
+ pca_comps = 'deprecated' , preprocessor = None ):
52
53
"""Initialize the learner.
53
54
54
55
Parameters
@@ -62,12 +63,10 @@ def __init__(self, n_components=None, num_dims='deprecated',
62
63
`num_dims` was deprecated in version 0.5.0 and will
63
64
be removed in 0.6.0. Use `n_components` instead.
64
65
65
- pca_comps : int, float, None or string
66
- Number of components to keep during PCA preprocessing.
67
- If None (default), does not perform PCA.
68
- If ``0 < pca_comps < 1``, it is used as
69
- the minimum explained variance ratio.
70
- See sklearn.decomposition.PCA for more details.
66
+ pca_comps : Not used
67
+ .. deprecated:: 0.5.0
68
+ `pca_comps` was deprecated in version 0.5.0 and will
69
+ be removed in 0.6.0.
71
70
72
71
preprocessor : array-like, shape=(n_samples, n_features) or callable
73
72
The preprocessor to call to get tuples from indices. If array-like,
@@ -83,8 +82,9 @@ def _check_dimension(self, rank, X):
83
82
if rank < d :
84
83
warnings .warn ('The inner covariance matrix is not invertible, '
85
84
'so the transformation matrix may contain Nan values. '
86
- 'You should adjust pca_comps to remove noise and '
87
- 'redundant information.' )
85
+ 'You should reduce the dimensionality of your input,'
86
+ 'for instance using `sklearn.decomposition.PCA` as a '
87
+ 'preprocessing step.' )
88
88
89
89
dim = _check_n_components (d , self .n_components )
90
90
return dim
@@ -105,25 +105,33 @@ def fit(self, X, chunks):
105
105
' It has been deprecated in version 0.5.0 and will be'
106
106
' removed in 0.6.0. Use "n_components" instead' ,
107
107
DeprecationWarning )
108
+
109
+ if self .pca_comps != 'deprecated' :
110
+ warnings .warn (
111
+ '"pca_comps" parameter is not used. '
112
+ 'It has been deprecated in version 0.5.0 and will be'
113
+ 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If '
114
+ 'you still want to do it, you could use '
115
+ '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.' ,
116
+ DeprecationWarning )
117
+
108
118
X , chunks = self ._prepare_inputs (X , chunks , ensure_min_samples = 2 )
109
119
110
- # PCA projection to remove noise and redundant information.
111
- if self .pca_comps is not None :
112
- pca = decomposition .PCA (n_components = self .pca_comps )
113
- X_t = pca .fit_transform (X )
114
- M_pca = pca .components_
115
- else :
116
- X_t = X - X .mean (axis = 0 )
117
- M_pca = None
120
+ warnings .warn (
121
+ "RCA will no longer center the data before training. If you want "
122
+ "to do some preprocessing, you should do it manually (you can also "
123
+ "use an `sklearn.pipeline.Pipeline` for instance). This warning "
124
+ "will disappear in version 0.6.0." , ChangedBehaviorWarning )
118
125
119
- chunk_mask , chunked_data = _chunk_mean_centering (X_t , chunks )
126
+ chunks = np .asanyarray (chunks , dtype = int )
127
+ chunk_mask , chunked_data = _chunk_mean_centering (X , chunks )
120
128
121
129
inner_cov = np .atleast_2d (np .cov (chunked_data , rowvar = 0 , bias = 1 ))
122
- dim = self ._check_dimension (np .linalg .matrix_rank (inner_cov ), X_t )
130
+ dim = self ._check_dimension (np .linalg .matrix_rank (inner_cov ), X )
123
131
124
132
# Fisher Linear Discriminant projection
125
- if dim < X_t .shape [1 ]:
126
- total_cov = np .cov (X_t [chunk_mask ], rowvar = 0 )
133
+ if dim < X .shape [1 ]:
134
+ total_cov = np .cov (X [chunk_mask ], rowvar = 0 )
127
135
tmp = np .linalg .lstsq (total_cov , inner_cov )[0 ]
128
136
vals , vecs = np .linalg .eig (tmp )
129
137
inds = np .argsort (vals )[:dim ]
@@ -133,9 +141,6 @@ def fit(self, X, chunks):
133
141
else :
134
142
self .transformer_ = _inv_sqrtm (inner_cov ).T
135
143
136
- if M_pca is not None :
137
- self .transformer_ = np .atleast_2d (self .transformer_ .dot (M_pca ))
138
-
139
144
return self
140
145
141
146
@@ -155,7 +160,7 @@ class RCA_Supervised(RCA):
155
160
"""
156
161
157
162
def __init__ (self , num_dims = 'deprecated' , n_components = None ,
158
- pca_comps = None , num_chunks = 100 , chunk_size = 2 ,
163
+ pca_comps = 'deprecated' , num_chunks = 100 , chunk_size = 2 ,
159
164
preprocessor = None ):
160
165
"""Initialize the supervised version of `RCA`.
161
166
0 commit comments