15
15
16
16
from __future__ import division
17
17
18
+ from collections import defaultdict
19
+ from itertools import islice
20
+
18
21
from sklearn import pipeline
19
22
from sklearn .base import clone
20
23
from sklearn .utils .metaestimators import if_delegate_has_method
@@ -38,6 +41,9 @@ class Pipeline(pipeline.Pipeline):
38
41
cross-validated together while setting different parameters.
39
42
For this, it enables setting parameters of the various steps using their
40
43
names and the parameter name separated by a '__', as in the example below.
44
+ A step's estimator may be replaced entirely by setting the parameter
45
+ with its name to another estimator, or a transformer removed by setting
46
+ it to 'passthrough' or ``None``.
41
47
42
48
Parameters
43
49
----------
@@ -121,7 +127,7 @@ def _validate_steps(self):
121
127
estimator = estimators [- 1 ]
122
128
123
129
for t in transformers :
124
- if t is None :
130
+ if t is None or t == 'passthrough' :
125
131
continue
126
132
if (not (hasattr (t , "fit" ) or
127
133
hasattr (t , "fit_transform" ) or
@@ -130,8 +136,9 @@ def _validate_steps(self):
130
136
hasattr (t , "fit_resample" ))):
131
137
raise TypeError (
132
138
"All intermediate steps of the chain should "
133
- "be estimators that implement fit and transform or sample "
134
- "(but not both) '%s' (type %s) doesn't)" % (t , type (t )))
139
+ "be estimators that implement fit and transform or "
140
+ "fit_resample (but not both) or be a string 'passthrough' "
141
+ "'%s' (type %s) doesn't)" % (t , type (t )))
135
142
136
143
if (hasattr (t , "fit_resample" ) and (hasattr (t , "fit_transform" ) or
137
144
hasattr (t , "transform" ))):
@@ -146,14 +153,16 @@ def _validate_steps(self):
146
153
" Pipelines" )
147
154
148
155
# We allow last estimator to be None as an identity transformation
149
- if estimator is not None and not hasattr (estimator , "fit" ):
150
- raise TypeError ("Last step of Pipeline should implement fit. "
151
- "'%s' (type %s) doesn't" % (estimator ,
152
- type (estimator )))
156
+ if (estimator is not None and estimator != 'passthrough'
157
+ and not hasattr (estimator , "fit" )):
158
+ raise TypeError ("Last step of Pipeline should implement fit or be "
159
+ "the string 'passthrough'. '%s' (type %s) doesn't"
160
+ % (estimator , type (estimator )))
153
161
154
162
# Estimator interface
155
163
156
164
def _fit (self , X , y = None , ** fit_params ):
165
+ self .steps = list (self .steps )
157
166
self ._validate_steps ()
158
167
# Setup the memory
159
168
memory = check_memory (self .memory )
@@ -166,44 +175,39 @@ def _fit(self, X, y=None, **fit_params):
166
175
for pname , pval in fit_params .items ():
167
176
step , param = pname .split ('__' , 1 )
168
177
fit_params_steps [step ][param ] = pval
169
- Xt = X
170
- yt = y
171
- for step_idx , (name , transformer ) in enumerate (self .steps [:- 1 ]):
172
- if transformer is None :
173
- pass
174
- else :
175
- if hasattr (memory , 'location' ):
176
- # joblib >= 0.12
177
- if memory .location is None :
178
- # we do not clone when caching is disabled to
179
- # preserve backward compatibility
180
- cloned_transformer = transformer
181
- else :
182
- cloned_transformer = clone (transformer )
183
- elif hasattr (memory , 'cachedir' ):
184
- # joblib < 0.11
185
- if memory .cachedir is None :
186
- # we do not clone when caching is disabled to
187
- # preserve backward compatibility
188
- cloned_transformer = transformer
178
+ for step_idx , name , transformer in self ._iter (with_final = False ):
179
+ if hasattr (memory , 'location' ):
180
+ # joblib >= 0.12
181
+ if memory .location is None :
182
+ # we do not clone when caching is disabled to
183
+ # preserve backward compatibility
184
+ cloned_transformer = transformer
189
185
else :
190
186
cloned_transformer = clone (transformer )
191
- # Fit or load from cache the current transfomer
192
- if (hasattr (cloned_transformer , "transform" ) or
193
- hasattr (cloned_transformer , "fit_transform" )):
194
- Xt , fitted_transformer = fit_transform_one_cached (
195
- cloned_transformer , None , Xt , yt ,
196
- ** fit_params_steps [name ])
197
- elif hasattr (cloned_transformer , "fit_resample" ):
198
- Xt , yt , fitted_transformer = fit_resample_one_cached (
199
- cloned_transformer , Xt , yt , ** fit_params_steps [name ])
200
- # Replace the transformer of the step with the fitted
201
- # transformer. This is necessary when loading the transformer
202
- # from the cache.
203
- self .steps [step_idx ] = (name , fitted_transformer )
204
- if self ._final_estimator is None :
205
- return Xt , yt , {}
206
- return Xt , yt , fit_params_steps [self .steps [- 1 ][0 ]]
187
+ elif hasattr (memory , 'cachedir' ):
188
+ # joblib < 0.11
189
+ if memory .cachedir is None :
190
+ # we do not clone when caching is disabled to
191
+ # preserve backward compatibility
192
+ cloned_transformer = transformer
193
+ else :
194
+ cloned_transformer = clone (transformer )
195
+ # Fit or load from cache the current transfomer
196
+ if (hasattr (cloned_transformer , "transform" ) or
197
+ hasattr (cloned_transformer , "fit_transform" )):
198
+ X , fitted_transformer = fit_transform_one_cached (
199
+ cloned_transformer , None , X , y ,
200
+ ** fit_params_steps [name ])
201
+ elif hasattr (cloned_transformer , "fit_resample" ):
202
+ X , y , fitted_transformer = fit_resample_one_cached (
203
+ cloned_transformer , X , y , ** fit_params_steps [name ])
204
+ # Replace the transformer of the step with the fitted
205
+ # transformer. This is necessary when loading the transformer
206
+ # from the cache.
207
+ self .steps [step_idx ] = (name , fitted_transformer )
208
+ if self ._final_estimator == 'passthrough' :
209
+ return X , y , {}
210
+ return X , y , fit_params_steps [self .steps [- 1 ][0 ]]
207
211
208
212
def fit (self , X , y = None , ** fit_params ):
209
213
"""Fit the model
@@ -234,7 +238,7 @@ def fit(self, X, y=None, **fit_params):
234
238
235
239
"""
236
240
Xt , yt , fit_params = self ._fit (X , y , ** fit_params )
237
- if self ._final_estimator is not None :
241
+ if self ._final_estimator != 'passthrough' :
238
242
self ._final_estimator .fit (Xt , yt , ** fit_params )
239
243
return self
240
244
@@ -268,7 +272,7 @@ def fit_transform(self, X, y=None, **fit_params):
268
272
"""
269
273
last_step = self ._final_estimator
270
274
Xt , yt , fit_params = self ._fit (X , y , ** fit_params )
271
- if last_step is None :
275
+ if last_step == 'passthrough' :
272
276
return Xt
273
277
elif hasattr (last_step , 'fit_transform' ):
274
278
return last_step .fit_transform (Xt , yt , ** fit_params )
@@ -308,7 +312,7 @@ def fit_resample(self, X, y=None, **fit_params):
308
312
"""
309
313
last_step = self ._final_estimator
310
314
Xt , yt , fit_params = self ._fit (X , y , ** fit_params )
311
- if last_step is None :
315
+ if last_step == 'passthrough' :
312
316
return Xt
313
317
elif hasattr (last_step , 'fit_resample' ):
314
318
return last_step .fit_resample (Xt , yt , ** fit_params )
@@ -338,9 +342,7 @@ def predict(self, X, **predict_params):
338
342
339
343
"""
340
344
Xt = X
341
- for _ , transform in self .steps [:- 1 ]:
342
- if transform is None :
343
- continue
345
+ for _ , _ , transform in self ._iter (with_final = False ):
344
346
if hasattr (transform , "fit_resample" ):
345
347
pass
346
348
else :
@@ -394,15 +396,33 @@ def predict_proba(self, X):
394
396
395
397
"""
396
398
Xt = X
397
- for _ , transform in self .steps [:- 1 ]:
398
- if transform is None :
399
- continue
399
+ for _ , _ , transform in self ._iter (with_final = False ):
400
400
if hasattr (transform , "fit_resample" ):
401
401
pass
402
402
else :
403
403
Xt = transform .transform (Xt )
404
404
return self .steps [- 1 ][- 1 ].predict_proba (Xt )
405
405
406
+ @if_delegate_has_method (delegate = '_final_estimator' )
407
+ def score_samples (self , X ):
408
+ """Apply transforms, and score_samples of the final estimator.
409
+ Parameters
410
+ ----------
411
+ X : iterable
412
+ Data to predict on. Must fulfill input requirements of first step
413
+ of the pipeline.
414
+ Returns
415
+ -------
416
+ y_score : ndarray, shape (n_samples,)
417
+ """
418
+ Xt = X
419
+ for _ , _ , transformer in self ._iter (with_final = False ):
420
+ if hasattr (transformer , "fit_resample" ):
421
+ pass
422
+ else :
423
+ Xt = transformer .transform (Xt )
424
+ return self .steps [- 1 ][- 1 ].score_samples (Xt )
425
+
406
426
@if_delegate_has_method (delegate = '_final_estimator' )
407
427
def decision_function (self , X ):
408
428
"""Apply transformers/samplers, and decision_function of the final
@@ -420,9 +440,7 @@ def decision_function(self, X):
420
440
421
441
"""
422
442
Xt = X
423
- for _ , transform in self .steps [:- 1 ]:
424
- if transform is None :
425
- continue
443
+ for _ , _ , transform in self ._iter (with_final = False ):
426
444
if hasattr (transform , "fit_resample" ):
427
445
pass
428
446
else :
@@ -446,9 +464,7 @@ def predict_log_proba(self, X):
446
464
447
465
"""
448
466
Xt = X
449
- for _ , transform in self .steps [:- 1 ]:
450
- if transform is None :
451
- continue
467
+ for _ , _ , transform in self ._iter (with_final = False ):
452
468
if hasattr (transform , "fit_resample" ):
453
469
pass
454
470
else :
@@ -473,15 +489,13 @@ def transform(self):
473
489
Xt : array-like, shape = [n_samples, n_transformed_features]
474
490
"""
475
491
# _final_estimator is None or has transform, otherwise attribute error
476
- if self ._final_estimator is not None :
492
+ if self ._final_estimator != 'passthrough' :
477
493
self ._final_estimator .transform
478
494
return self ._transform
479
495
480
496
def _transform (self , X ):
481
497
Xt = X
482
- for name , transform in self .steps :
483
- if transform is None :
484
- continue
498
+ for _ , _ , transform in self ._iter ():
485
499
if hasattr (transform , "fit_resample" ):
486
500
pass
487
501
else :
@@ -507,29 +521,20 @@ def inverse_transform(self):
507
521
Xt : array-like, shape = [n_samples, n_features]
508
522
"""
509
523
# raise AttributeError if necessary for hasattr behaviour
510
- for name , transform in self .steps :
511
- if transform is not None :
512
- transform .inverse_transform
524
+ for _ , _ , transform in self ._iter ():
525
+ transform .inverse_transform
513
526
return self ._inverse_transform
514
527
515
528
def _inverse_transform (self , X ):
516
529
Xt = X
517
- for name , transform in self .steps [::- 1 ]:
518
- if transform is None :
519
- continue
530
+ reverse_iter = reversed (list (self ._iter ()))
531
+ for _ , _ , transform in reverse_iter :
520
532
if hasattr (transform , "fit_resample" ):
521
533
pass
522
534
else :
523
535
Xt = transform .inverse_transform (Xt )
524
536
return Xt
525
537
526
- # need to overwrite sklearn's _final_estimator since sklearn supports
527
- # 'passthrough', but imblearn does not.
528
- @property
529
- def _final_estimator (self ):
530
- estimator = self .steps [- 1 ][1 ]
531
- return estimator
532
-
533
538
@if_delegate_has_method (delegate = '_final_estimator' )
534
539
def score (self , X , y = None , sample_weight = None ):
535
540
"""Apply transformers/samplers, and score with the final estimator
@@ -553,9 +558,7 @@ def score(self, X, y=None, sample_weight=None):
553
558
score : float
554
559
"""
555
560
Xt = X
556
- for _ , transform in self .steps [:- 1 ]:
557
- if transform is None :
558
- continue
561
+ for _ , _ , transform in self ._iter (with_final = False ):
559
562
if hasattr (transform , "fit_resample" ):
560
563
pass
561
564
else :
@@ -618,7 +621,7 @@ def make_pipeline(*steps, **kwargs):
618
621
>>> from sklearn.naive_bayes import GaussianNB
619
622
>>> from sklearn.preprocessing import StandardScaler
620
623
>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
621
- ... # doctest: +NORMALIZE_WHITESPACE
624
+ ... # doctest: +NORMALIZE_WHITESPACE
622
625
Pipeline(memory=None,
623
626
steps=[('standardscaler',
624
627
StandardScaler(copy=True, with_mean=True, with_std=True)),
0 commit comments