26
26
def _chunk_mean_centering (data , chunks ):
27
27
num_chunks = chunks .max () + 1
28
28
chunk_mask = chunks != - 1
29
- chunk_data = data [chunk_mask ]
29
+ # We need to ensure the data is float so that we can substract the
30
+ # mean on it
31
+ chunk_data = data [chunk_mask ].astype (float , copy = False )
30
32
chunk_labels = chunks [chunk_mask ]
31
33
for c in xrange (num_chunks ):
32
34
mask = chunk_labels == c
@@ -98,7 +100,7 @@ def fit(self, X, chunks):
98
100
When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
99
101
When ``chunks[i] == j``, point i belongs to chunklet j.
100
102
"""
101
- X = self ._prepare_inputs (X , ensure_min_samples = 2 )
103
+ X , chunks = self ._prepare_inputs (X , chunks , ensure_min_samples = 2 )
102
104
103
105
# PCA projection to remove noise and redundant information.
104
106
if self .pca_comps is not None :
@@ -109,7 +111,6 @@ def fit(self, X, chunks):
109
111
X_t = X - X .mean (axis = 0 )
110
112
M_pca = None
111
113
112
- chunks = np .asanyarray (chunks , dtype = int )
113
114
chunk_mask , chunked_data = _chunk_mean_centering (X_t , chunks )
114
115
115
116
inner_cov = np .atleast_2d (np .cov (chunked_data , rowvar = 0 , bias = 1 ))
0 commit comments