scikit-learn-contrib · glemaitre · Jul 27, 2018 · Jul 26, 2018 · Jul 26, 2018 · Jul 27, 2018
diff --git a/doc/over_sampling.rst b/doc/over_sampling.rst
@@ -127,11 +127,11 @@ nearest neighbors class. Those variants are presented in the figure below.
    :align: center
 
 
-The parameter ``kind`` is controlling this feature and the following types are
-available: (i) ``'borderline1'``, (ii) ``'borderline2'``, and (iii) ``'svm'``::
+The :class:`BorderlineSMOTE` and :class:`SVMSMOTE` offer some variant of the SMOTE
+algorithm::
 
-  >>> from imblearn.over_sampling import SMOTE, ADASYN
-  >>> X_resampled, y_resampled = SMOTE(kind='borderline1').fit_sample(X, y)
+  >>> from imblearn.over_sampling import BorderlineSMOTE
+  >>> X_resampled, y_resampled = BorderlineSMOTE().fit_sample(X, y)
   >>> print(sorted(Counter(y_resampled).items()))
   [(0, 4674), (1, 4674), (2, 4674)]
 
@@ -168,12 +168,11 @@ interpolation will create a sample on the line between :math:`x_{i}` and
 Each SMOTE variant and ADASYN differ from each other by selecting the samples
 :math:`x_i` ahead of generating the new samples.
 
-The **regular** SMOTE algorithm --- cf. to ``kind='regular'`` when
-instantiating a :class:`SMOTE` object --- does not impose any rule and will
-randomly pick-up all possible :math:`x_i` available.
+The **regular** SMOTE algorithm --- cf. to the :class:`SMOTE` object --- does not
+impose any rule and will randomly pick-up all possible :math:`x_i` available.
 
-The **borderline** SMOTE --- cf. to ``kind='borderline1'`` and
-``kind='borderline2'`` when instantiating a :class:`SMOTE` object --- will
+The **borderline** SMOTE --- cf. to the :class:`BorderlineSMOTE` with the
+parameters ``kind='borderline-1'`` and ``kind='borderline-2'`` --- will
 classify each sample :math:`x_i` to be (i) noise (i.e. all nearest-neighbors
 are from a different class than the one of :math:`x_i`), (ii) in danger
 (i.e. at least half of the nearest neighbors are from the same class than
@@ -184,10 +183,9 @@ samples *in danger* to generate new samples. In **Borderline-1** SMOTE,
 :math:`x_i`. On the contrary, **Borderline-2** SMOTE will consider
 :math:`x_{zi}` which can be from any class.
 
-**SVM** SMOTE --- cf. to ``kind='svm'`` when instantiating a :class:`SMOTE`
-object --- uses an SVM classifier to find support vectors and generate samples
-considering them. Note that the ``C`` parameter of the SVM classifier allows to
-select more or less support vectors.
+**SVM** SMOTE --- cf. to :class:`SVMSMOTE` --- uses an SVM classifier to find
+support vectors and generate samples considering them. Note that the ``C``
+parameter of the SVM classifier allows to select more or less support vectors.
 
 For both borderline and SVM SMOTE, a neighborhood is defined using the
 parameter ``m_neighbors`` to decide if a sample is in danger, safe, or noise.
@@ -196,7 +194,7 @@ ADASYN is working similarly to the regular SMOTE. However, the number of
 samples generated for each :math:`x_i` is proportional to the number of samples
 which are not from the same class than :math:`x_i` in a given
 neighborhood. Therefore, more samples will be generated in the area that the
-nearest neighbor rule is not respected. The parameter ``n_neighbors`` is
+nearest neighbor rule is not respected. The parameter ``m_neighbors`` is
 equivalent to ``k_neighbors`` in :class:`SMOTE`.
 
 Multi-class management

diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -30,6 +30,10 @@ Enhancement
 - Add support for one-vs-all encoded target to support keras. :issue:`409` by
   :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Adding specific class for borderline and SVM SMOTE using
+  :class:`BorderlineSMOTE` and :class:`SVMSMOTE`.
+  :issue:`440` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Bug fixes
 .........
 
@@ -63,3 +67,9 @@ Deprecation
   :class:`imblearn.under_sampling.NeighbourhoodCleaningRule`,
   :class:`imblearn.under_sampling.InstanceHardnessThreshold`,
   :class:`imblearn.under_sampling.CondensedNearestNeighbours`.
+
+- Deprecate ``kind``, ``out_step``, ``svm_estimator``, ``m_neighbors`` in
+  :class:`imblearn.over_sampling.SMOTE`. User should use
+  :class:`imblearn.over_sampling.SVMSMOTE` and
+  :class:`imblearn.over_sampling.BorderlineSMOTE`.
+  :issue:`440` by :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/examples/over-sampling/plot_comparison_over_sampling.py b/examples/over-sampling/plot_comparison_over_sampling.py
@@ -20,7 +20,9 @@
 from sklearn.svm import LinearSVC
 
 from imblearn.pipeline import make_pipeline
-from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
+from imblearn.over_sampling import ADASYN
+from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE
+from imblearn.over_sampling import RandomOverSampler
 from imblearn.base import SamplerMixin
 from imblearn.utils import hash_X_y
 
@@ -220,21 +222,18 @@ def fit_sample(self, X, y):
                       class_sep=0.8)
 
 ax_arr = ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8))
-string_add = ['regular', 'borderline-1', 'borderline-2', 'SVM']
-for str_add, ax, sampler in zip(string_add,
-                                ax_arr,
-                                (SMOTE(random_state=0),
-                                 SMOTE(random_state=0, kind='borderline1'),
-                                 SMOTE(random_state=0, kind='borderline2'),
-                                 SMOTE(random_state=0, kind='svm'))):
+for ax, sampler in zip(ax_arr,
+                       (SMOTE(random_state=0),
+                        BorderlineSMOTE(random_state=0, kind='borderline-1'),
+                        BorderlineSMOTE(random_state=0, kind='borderline-2'),
+                        SVMSMOTE(random_state=0))):
     clf = make_pipeline(sampler, LinearSVC())
     clf.fit(X, y)
     plot_decision_function(X, y, clf, ax[0])
-    ax[0].set_title('Decision function for {} {}'.format(
-        str_add, sampler.__class__.__name__))
+    ax[0].set_title('Decision function for {}'.format(
+        sampler.__class__.__name__))
     plot_resampling(X, y, sampler, ax[1])
-    ax[1].set_title('Resampling using {} {}'.format(
-        str_add, sampler.__class__.__name__))
+    ax[1].set_title('Resampling using {}'.format(sampler.__class__.__name__))
 fig.tight_layout()
 
 plt.show()
diff --git a/examples/over-sampling/plot_smote.py b/examples/over-sampling/plot_smote.py
@@ -17,6 +17,8 @@
 from sklearn.decomposition import PCA
 
 from imblearn.over_sampling import SMOTE
+from imblearn.over_sampling import BorderlineSMOTE
+from imblearn.over_sampling import SVMSMOTE
 
 print(__doc__)
 
@@ -49,8 +51,8 @@ def plot_resampling(ax, X, y, title):
 X_vis = pca.fit_transform(X)
 
 # Apply regular SMOTE
-kind = ['regular', 'borderline1', 'borderline2', 'svm']
-sm = [SMOTE(kind=k) for k in kind]
+sm = [SMOTE(), BorderlineSMOTE(kind='borderline-1'),
+      BorderlineSMOTE(kind='borderline-2'), SVMSMOTE()]
 X_resampled = []
 y_resampled = []
 X_res_vis = []
@@ -67,9 +69,10 @@ def plot_resampling(ax, X, y, title):
 ax_res = [ax3, ax4, ax5, ax6]
 
 c0, c1 = plot_resampling(ax1, X_vis, y, 'Original set')
-for i in range(len(kind)):
+for i, name in enumerate(['SMOTE', 'SMOTE Borderline-1',
+                          'SMOTE Borderline-2', 'SMOTE SVM']):
     plot_resampling(ax_res[i], X_res_vis[i], y_resampled[i],
-                    'SMOTE {}'.format(kind[i]))
+                    '{}'.format(name))
 
 ax2.legend((c0, c1), ('Class #0', 'Class #1'), loc='center',
            ncol=1, labelspacing=0.)

diff --git a/imblearn/over_sampling/__init__.py b/imblearn/over_sampling/__init__.py
@@ -6,5 +6,8 @@
 from .adasyn import ADASYN
 from .random_over_sampler import RandomOverSampler
 from .smote import SMOTE
+from .smote import BorderlineSMOTE
+from .smote import SVMSMOTE
 
-__all__ = ['ADASYN', 'RandomOverSampler', 'SMOTE']
+__all__ = ['ADASYN', 'RandomOverSampler',
+           'SMOTE', 'BorderlineSMOTE', 'SVMSMOTE']