26
26
n_jobs = _n_jobs_docstring ,
27
27
)
28
28
class SPIDER (BasePreprocessSampler ):
29
- """Perform filtering and over-sampling using Selective Pre-processing of
30
- Imbalanced Data (SPIDER) sampling approach for imbalanced datasets.
29
+ """Perform filtering and over-sampling using SPIDER algorithm.
30
+
31
+ This object is an implementation of SPIDER - Selective Pre-processing of
32
+ Imbalanced Data as presented in [1]_ and [2]_.
31
33
32
34
Read more in the :ref:`User Guide <combine>`.
33
35
34
36
Parameters
35
37
----------
36
38
{sampling_strategy}
37
39
38
- kind : str (default='weak')
39
- Possible choices are:
40
-
41
- ``'weak'``: Amplify noisy minority class samples based on the
42
- number of safe majority neighbors.
40
+ kind_sel : {{"weak", "relabel", "strong"}}, default='weak'
41
+ Strategy to use in order to preprocess samples in the SPIDER sampling.
43
42
44
- ``'relabel'``: Perform ``'weak'`` amplification and then relabel
45
- noisy majority neighbors for each noisy minority class sample.
46
-
47
- ``'strong'``: Amplify all minority class samples by an extra
48
- ``additional_neighbors`` if the sample is classified incorrectly
49
- by its neighbors. Otherwise each minority sample is amplified in a
50
- manner akin to ``'weak'`` amplification.
43
+ - If ``'weak'``, amplify noisy minority class samples based on the
44
+ number of safe majority neighbors.
45
+ - If ``'relabel'``, perform ``'weak'`` amplification and then relabel
46
+ noisy majority neighbors for each noisy minority class sample.
47
+ - If ``'strong'``, amplify all minority class samples by an extra
48
+ ``additional_neighbors`` if the sample is classified incorrectly
49
+ by its neighbors. Otherwise each minority sample is amplified in a
50
+ manner akin to ``'weak'`` amplification.
51
51
52
52
n_neighbors : int or object, optional (default=3)
53
53
If ``int``, number of nearest neighbours to used to construct synthetic
54
54
samples. If object, an estimator that inherits from
55
55
:class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
56
- find the k_neighbors .
56
+ find the nearest-neighbors .
57
57
58
58
additional_neighbors : int, optional (default=2)
59
- The number to add to amplified samples during if ``kind `` is
59
+ The number to add to amplified samples during if ``kind_sel `` is
60
60
``'strong'``. This has no effect otherwise.
61
61
62
62
{n_jobs}
63
63
64
+ See Also
65
+ --------
66
+ NeighborhoodClearingRule : Undersample by editing noisy samples.
67
+
68
+ RandomOverSampler : Random oversample the dataset.
69
+
64
70
Notes
65
71
-----
66
72
The implementation is based on [1]_ and [2]_.
67
73
68
74
Supports multi-class resampling. A one-vs.-rest scheme is used.
69
75
70
- See also
71
- --------
72
- NeighborhoodClearingRule : Undersample by editing noisy samples.
73
-
74
- RandomOverSampler : Random oversample the dataset.
75
-
76
76
References
77
77
----------
78
78
.. [1] Stefanowski, J., & Wilk, S, "Selective pre-processing of imbalanced
@@ -107,13 +107,13 @@ class SPIDER(BasePreprocessSampler):
107
107
def __init__ (
108
108
self ,
109
109
sampling_strategy = "auto" ,
110
- kind = "weak" ,
110
+ kind_sel = "weak" ,
111
111
n_neighbors = 3 ,
112
112
additional_neighbors = 2 ,
113
113
n_jobs = None ,
114
114
):
115
115
super ().__init__ (sampling_strategy = sampling_strategy )
116
- self .kind = kind
116
+ self .kind_sel = kind_sel
117
117
self .n_neighbors = n_neighbors
118
118
self .additional_neighbors = additional_neighbors
119
119
self .n_jobs = n_jobs
@@ -124,10 +124,10 @@ def _validate_estimator(self):
124
124
"n_neighbors" , self .n_neighbors , additional_neighbor = 1 )
125
125
self .nn_ .set_params (** {"n_jobs" : self .n_jobs })
126
126
127
- if self .kind not in SEL_KIND :
127
+ if self .kind_sel not in SEL_KIND :
128
128
raise ValueError (
129
129
'The possible "kind" of algorithm are "weak", "relabel",'
130
- ' and "strong". Got {} instead.' .format (self .kind )
130
+ ' and "strong". Got {} instead.' .format (self .kind_sel )
131
131
)
132
132
133
133
if self .additional_neighbors < 1 :
@@ -258,17 +258,17 @@ def _fit_resample(self, X, y):
258
258
X_class_noisy = _safe_indexing (X , class_noisy_indices )
259
259
y_class_noisy = y [class_noisy_indices ]
260
260
261
- if self .kind in ("weak" , "relabel" ):
261
+ if self .kind_sel in ("weak" , "relabel" ):
262
262
nn_indices = self ._amplify (X_class_noisy , y_class_noisy )
263
263
264
- if self .kind == "relabel" :
264
+ if self .kind_sel == "relabel" :
265
265
relabel_mask = np .isin (nn_indices , discard_indices )
266
266
relabel_indices = np .unique (nn_indices [relabel_mask ])
267
267
self ._y [relabel_indices ] = class_sample
268
268
discard_indices = np .setdiff1d (
269
269
discard_indices , relabel_indices )
270
270
271
- elif self .kind == "strong" :
271
+ elif self .kind_sel == "strong" :
272
272
class_safe_indices = np .flatnonzero (is_class & is_safe )
273
273
X_class_safe = _safe_indexing (X , class_safe_indices )
274
274
y_class_safe = y [class_safe_indices ]
@@ -287,7 +287,7 @@ def _fit_resample(self, X, y):
287
287
y_incorrect = y_class_noisy [~ is_correct ]
288
288
self ._amplify (X_incorrect , y_incorrect , additional = True )
289
289
else :
290
- raise NotImplementedError (self .kind )
290
+ raise NotImplementedError (self .kind_sel )
291
291
292
292
discard_mask = np .ones_like (y , dtype = bool )
293
293
try :
0 commit comments