30
30
31
31
32
32
def plot_scatter (X , y , title ):
33
+ """Function to plot some data as a scatter plot."""
33
34
plt .figure ()
34
35
plt .scatter (X [y == 1 , 0 ], X [y == 1 , 1 ], label = 'Class #1' )
35
36
plt .scatter (X [y == 0 , 0 ], X [y == 0 , 1 ], label = 'Class #0' )
36
37
plt .legend ()
37
38
plt .title (title )
38
39
40
+ ##############################################################################
41
+ # Toy data generation
42
+ ##############################################################################
43
+
44
+ ##############################################################################
45
+ # We are generating some non Gaussian data set contaminated with some unform
46
+ # noise.
39
47
40
- # Generate contaminated training data
41
48
moons , _ = make_moons (n_samples = 500 , noise = 0.05 )
42
49
blobs , _ = make_blobs (n_samples = 500 , centers = [(- 0.75 , 2.25 ),
43
50
(1.0 , 2.0 )],
@@ -51,7 +58,9 @@ def plot_scatter(X, y, title):
51
58
52
59
plot_scatter (X_train , y_train , 'Training dataset' )
53
60
54
- # Generate non-contaminated testing data
61
+ ##############################################################################
62
+ # We will generate some cleaned test data without outliers.
63
+
55
64
moons , _ = make_moons (n_samples = 50 , noise = 0.05 )
56
65
blobs , _ = make_blobs (n_samples = 50 , centers = [(- 0.75 , 2.25 ),
57
66
(1.0 , 2.0 )],
@@ -62,8 +71,19 @@ def plot_scatter(X, y, title):
62
71
63
72
plot_scatter (X_test , y_test , 'Testing dataset' )
64
73
74
+ ##############################################################################
75
+ # How to use the :class:`imblearn.FunctionSampler`
76
+ ##############################################################################
77
+
78
+ ##############################################################################
79
+ # We first define a function which will use
80
+ # :class:`sklearn.ensemble.IsolationForest` to eliminate some outliers from
81
+ # our dataset during training. The function passed to the
82
+ # :class:`imblearn.FunctionSampler` will be called when using the method
83
+ # ``fit_resample``.
65
84
66
85
def outlier_rejection (X , y ):
86
+ """This will be our function used to resample our dataset."""
67
87
model = IsolationForest (max_samples = 100 ,
68
88
contamination = 0.4 ,
69
89
random_state = rng )
@@ -76,6 +96,14 @@ def outlier_rejection(X, y):
76
96
X_inliers , y_inliers = reject_sampler .fit_resample (X_train , y_train )
77
97
plot_scatter (X_inliers , y_inliers , 'Training data without outliers' )
78
98
99
+ ##############################################################################
100
+ # Integrate it within a pipeline
101
+ ##############################################################################
102
+
103
+ ##############################################################################
104
+ # By elimnating outliers before the training, the classifier will be less
105
+ # affected during the prediction.
106
+
79
107
pipe = make_pipeline (FunctionSampler (func = outlier_rejection ),
80
108
LogisticRegression (random_state = rng ))
81
109
y_pred = pipe .fit (X_train , y_train ).predict (X_test )
0 commit comments