Skip to content

Commit 802cc44

Browse files
committed
increase n_jobs
1 parent db914bf commit 802cc44

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

examples/applications/plot_impact_imbalanced_classes.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
158158

159159
preprocessor_linear = ColumnTransformer(
160160
[("num-pipe", num_pipe, selector(dtype_include=np.number)),
161-
("cat-pipe", cat_pipe, selector(dtype_include=pd.CategoricalDtype))]
161+
("cat-pipe", cat_pipe, selector(dtype_include=pd.CategoricalDtype))],
162+
n_jobs=2
162163
)
163164

164165
###############################################################################
@@ -192,11 +193,12 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
192193

193194
preprocessor_tree = ColumnTransformer(
194195
[("num-pipe", num_pipe, selector(dtype_include=np.number)),
195-
("cat-pipe", cat_pipe, selector(dtype_include=pd.CategoricalDtype))]
196+
("cat-pipe", cat_pipe, selector(dtype_include=pd.CategoricalDtype))],
197+
n_jobs=2
196198
)
197199

198200
rf_clf = make_pipeline(
199-
preprocessor_tree, RandomForestClassifier(random_state=42)
201+
preprocessor_tree, RandomForestClassifier(random_state=42, n_jobs=2)
200202
)
201203

202204
df_scores = evaluate_classifier(rf_clf, df_scores, "RF")
@@ -266,7 +268,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
266268
rf_clf = make_pipeline_with_sampler(
267269
preprocessor_tree,
268270
RandomUnderSampler(random_state=42),
269-
RandomForestClassifier(random_state=42)
271+
RandomForestClassifier(random_state=42, n_jobs=2)
270272
)
271273

272274
df_scores = evaluate_classifier(
@@ -275,9 +277,10 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
275277
df_scores
276278

277279
###############################################################################
278-
# Applying a random under-sampler before the training of the linear model or random
279-
# forest, allows to not focus on the majority class at the cost of making more
280-
# mistake for samples in the majority class (i.e. decreased accuracy).
280+
# Applying a random under-sampler before the training of the linear model or
281+
# random forest, allows to not focus on the majority class at the cost of
282+
# making more mistake for samples in the majority class (i.e. decreased
283+
# accuracy).
281284
#
282285
# We could apply any type of samplers and find which sampler is working best
283286
# on the current dataset.
@@ -288,16 +291,17 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
288291
# Use of `BalancedRandomForestClassifier` and `BalancedBaggingClassifier`
289292
# .......................................................................
290293
#
291-
# We already showed that random under-sampling can be effective on decision tree.
292-
# However, instead of under-sampling once the dataset, one could under-sample
293-
# the original dataset before to take a bootstrap sample. This is the base of
294-
# the `BalancedRandomForestClassifier` and `BalancedBaggingClassifier`.
294+
# We already showed that random under-sampling can be effective on decision
295+
# tree. However, instead of under-sampling once the dataset, one could
296+
# under-sample the original dataset before to take a bootstrap sample. This is
297+
# the base of the `BalancedRandomForestClassifier` and
298+
# `BalancedBaggingClassifier`.
295299

296300
from imblearn.ensemble import BalancedRandomForestClassifier
297301

298302
rf_clf = make_pipeline(
299303
preprocessor_tree,
300-
BalancedRandomForestClassifier(random_state=42)
304+
BalancedRandomForestClassifier(random_state=42, n_jobs=2)
301305
)
302306

303307
df_scores = evaluate_classifier(rf_clf, df_scores, "Balanced RF")
@@ -316,7 +320,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
316320
preprocessor_tree,
317321
BalancedBaggingClassifier(
318322
base_estimator=HistGradientBoostingClassifier(random_state=42),
319-
n_estimators=10, random_state=42
323+
n_estimators=10, random_state=42, n_jobs=2
320324
)
321325
)
322326

@@ -330,8 +334,8 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
330334
# to bring some diversity for the different GBDT to learn and not focus on a
331335
# portion of the majority class.
332336
#
333-
# We will repeat the same experiment but with a ratio of 100:1 and make a similar
334-
# analysis.
337+
# We will repeat the same experiment but with a ratio of 100:1 and make a
338+
# similar analysis.
335339

336340
###############################################################################
337341
# Increase imbalanced ratio
@@ -354,7 +358,7 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
354358
)
355359
df_scores = evaluate_classifier(lr_clf, df_scores, "LR")
356360
rf_clf = make_pipeline(
357-
preprocessor_tree, RandomForestClassifier(random_state=42)
361+
preprocessor_tree, RandomForestClassifier(random_state=42, n_jobs=2)
358362
)
359363
df_scores = evaluate_classifier(rf_clf, df_scores, "RF")
360364
lr_clf.set_params(logisticregression__class_weight="balanced")
@@ -376,14 +380,14 @@ def evaluate_classifier(clf, df_scores, clf_name=None):
376380
rf_clf = make_pipeline_with_sampler(
377381
preprocessor_tree,
378382
RandomUnderSampler(random_state=42),
379-
RandomForestClassifier(random_state=42)
383+
RandomForestClassifier(random_state=42, n_jobs=2)
380384
)
381385
df_scores = evaluate_classifier(
382386
rf_clf, df_scores, "RF with under-sampling"
383387
)
384388
rf_clf = make_pipeline(
385389
preprocessor_tree,
386-
BalancedRandomForestClassifier(random_state=42)
390+
BalancedRandomForestClassifier(random_state=42, n_jobs=2)
387391
)
388392
df_scores = evaluate_classifier(rf_clf, df_scores)
389393
df_scores = evaluate_classifier(

0 commit comments

Comments
 (0)