@@ -158,7 +158,8 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
158
158
159
159
preprocessor_linear = ColumnTransformer (
160
160
[("num-pipe" , num_pipe , selector (dtype_include = np .number )),
161
- ("cat-pipe" , cat_pipe , selector (dtype_include = pd .CategoricalDtype ))]
161
+ ("cat-pipe" , cat_pipe , selector (dtype_include = pd .CategoricalDtype ))],
162
+ n_jobs = 2
162
163
)
163
164
164
165
###############################################################################
@@ -192,11 +193,12 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
192
193
193
194
preprocessor_tree = ColumnTransformer (
194
195
[("num-pipe" , num_pipe , selector (dtype_include = np .number )),
195
- ("cat-pipe" , cat_pipe , selector (dtype_include = pd .CategoricalDtype ))]
196
+ ("cat-pipe" , cat_pipe , selector (dtype_include = pd .CategoricalDtype ))],
197
+ n_jobs = 2
196
198
)
197
199
198
200
rf_clf = make_pipeline (
199
- preprocessor_tree , RandomForestClassifier (random_state = 42 )
201
+ preprocessor_tree , RandomForestClassifier (random_state = 42 , n_jobs = 2 )
200
202
)
201
203
202
204
df_scores = evaluate_classifier (rf_clf , df_scores , "RF" )
@@ -266,7 +268,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
266
268
rf_clf = make_pipeline_with_sampler (
267
269
preprocessor_tree ,
268
270
RandomUnderSampler (random_state = 42 ),
269
- RandomForestClassifier (random_state = 42 )
271
+ RandomForestClassifier (random_state = 42 , n_jobs = 2 )
270
272
)
271
273
272
274
df_scores = evaluate_classifier (
@@ -275,9 +277,10 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
275
277
df_scores
276
278
277
279
###############################################################################
278
- # Applying a random under-sampler before the training of the linear model or random
279
- # forest, allows to not focus on the majority class at the cost of making more
280
- # mistake for samples in the majority class (i.e. decreased accuracy).
280
+ # Applying a random under-sampler before the training of the linear model or
281
+ # random forest, allows to not focus on the majority class at the cost of
282
+ # making more mistake for samples in the majority class (i.e. decreased
283
+ # accuracy).
281
284
#
282
285
# We could apply any type of samplers and find which sampler is working best
283
286
# on the current dataset.
@@ -288,16 +291,17 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
288
291
# Use of `BalancedRandomForestClassifier` and `BalancedBaggingClassifier`
289
292
# .......................................................................
290
293
#
291
- # We already showed that random under-sampling can be effective on decision tree.
292
- # However, instead of under-sampling once the dataset, one could under-sample
293
- # the original dataset before to take a bootstrap sample. This is the base of
294
- # the `BalancedRandomForestClassifier` and `BalancedBaggingClassifier`.
294
+ # We already showed that random under-sampling can be effective on decision
295
+ # tree. However, instead of under-sampling once the dataset, one could
296
+ # under-sample the original dataset before to take a bootstrap sample. This is
297
+ # the base of the `BalancedRandomForestClassifier` and
298
+ # `BalancedBaggingClassifier`.
295
299
296
300
from imblearn .ensemble import BalancedRandomForestClassifier
297
301
298
302
rf_clf = make_pipeline (
299
303
preprocessor_tree ,
300
- BalancedRandomForestClassifier (random_state = 42 )
304
+ BalancedRandomForestClassifier (random_state = 42 , n_jobs = 2 )
301
305
)
302
306
303
307
df_scores = evaluate_classifier (rf_clf , df_scores , "Balanced RF" )
@@ -316,7 +320,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
316
320
preprocessor_tree ,
317
321
BalancedBaggingClassifier (
318
322
base_estimator = HistGradientBoostingClassifier (random_state = 42 ),
319
- n_estimators = 10 , random_state = 42
323
+ n_estimators = 10 , random_state = 42 , n_jobs = 2
320
324
)
321
325
)
322
326
@@ -330,8 +334,8 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
330
334
# to bring some diversity for the different GBDT to learn and not focus on a
331
335
# portion of the majority class.
332
336
#
333
- # We will repeat the same experiment but with a ratio of 100:1 and make a similar
334
- # analysis.
337
+ # We will repeat the same experiment but with a ratio of 100:1 and make a
338
+ # similar analysis.
335
339
336
340
###############################################################################
337
341
# Increase imbalanced ratio
@@ -354,7 +358,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
354
358
)
355
359
df_scores = evaluate_classifier (lr_clf , df_scores , "LR" )
356
360
rf_clf = make_pipeline (
357
- preprocessor_tree , RandomForestClassifier (random_state = 42 )
361
+ preprocessor_tree , RandomForestClassifier (random_state = 42 , n_jobs = 2 )
358
362
)
359
363
df_scores = evaluate_classifier (rf_clf , df_scores , "RF" )
360
364
lr_clf .set_params (logisticregression__class_weight = "balanced" )
@@ -376,14 +380,14 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
376
380
rf_clf = make_pipeline_with_sampler (
377
381
preprocessor_tree ,
378
382
RandomUnderSampler (random_state = 42 ),
379
- RandomForestClassifier (random_state = 42 )
383
+ RandomForestClassifier (random_state = 42 , n_jobs = 2 )
380
384
)
381
385
df_scores = evaluate_classifier (
382
386
rf_clf , df_scores , "RF with under-sampling"
383
387
)
384
388
rf_clf = make_pipeline (
385
389
preprocessor_tree ,
386
- BalancedRandomForestClassifier (random_state = 42 )
390
+ BalancedRandomForestClassifier (random_state = 42 , n_jobs = 2 )
387
391
)
388
392
df_scores = evaluate_classifier (rf_clf , df_scores )
389
393
df_scores = evaluate_classifier (
0 commit comments