scikit-learn-contrib · glemaitre · Jun 11, 2019 · Feb 15, 2019 · Feb 15, 2019 · Feb 15, 2019
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -93,12 +93,13 @@ Several methods taking advantage of boosting have been designed.
 a boosting iteration [SKHN2010]_::
 
   >>> from imblearn.ensemble import RUSBoostClassifier
-  >>> rusboost = RUSBoostClassifier(random_state=0)
+  >>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R',
+  ...                               random_state=0)
   >>> rusboost.fit(X_train, y_train)  # doctest: +ELLIPSIS
   RUSBoostClassifier(...)
   >>> y_pred = rusboost.predict(X_test)
   >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
-  0.74...
+  0.66...
 
 A specific method which uses ``AdaBoost`` as learners in the bagging classifier
 is called EasyEnsemble. The :class:`EasyEnsembleClassifier` allows to bag

diff --git a/doc/whats_new/v0.5.rst b/doc/whats_new/v0.5.rst
@@ -6,6 +6,16 @@ Version 0.5 (under development)
 Changelog
 ---------
 
+Changed models
+..............
+
+The following models or function might give different results even if the
+same data ``X`` and ``y`` are the same.
+
+* :class:`imblearn.ensemble.RUSBoostClassifier` default estimator changed from
+  :class:`sklearn.tree.DecisionTreeClassifier` with full depth to a decision
+  stump (i.e., tree with ``max_depth=1``).
+
 Documentation
 .............
 
@@ -53,3 +63,8 @@ Bug
 - Fix bug in :class:`imblearn.pipeline.Pipeline` where None could be the final
   estimator.
   :pr:`554` by :user:`Oliver Rausch <orausch>`.
+
+- Fix bug by changing the default depth in
+  :class:`imblearn.ensemble.RUSBoostClassifier` to get a decision stump as a
+  weak learner as in the original paper.
+  :pr:`545` by :user:`Christos Aridas <chkoar>`.
diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py
@@ -30,10 +30,11 @@ class RUSBoostClassifier(AdaBoostClassifier):
 
     Parameters
     ----------
-    base_estimator : object, optional (default=DecisionTreeClassifier)
+    base_estimator : object, optional (default=None)
         The base estimator from which the boosted ensemble is built.
-        Support for sample weighting is required, as well as proper `classes_`
-        and `n_classes_` attributes.
+        Support for sample weighting is required, as well as proper
+        ``classes_`` and ``n_classes_`` attributes. If ``None``, then
+        the base estimator is ``DecisionTreeClassifier(max_depth=1)``
 
     n_estimators : integer, optional (default=50)
         The maximum number of estimators at which boosting is terminated.
@@ -151,21 +152,10 @@ def fit(self, X, y, sample_weight=None):
         super().fit(X, y, sample_weight)
         return self
 
-    def _validate_estimator(self, default=DecisionTreeClassifier()):
+    def _validate_estimator(self):
         """Check the estimator and the n_estimator attribute, set the
         `base_estimator_` attribute."""
-        if not isinstance(self.n_estimators, (numbers.Integral, np.integer)):
-            raise ValueError("n_estimators must be an integer, "
-                             "got {}.".format(type(self.n_estimators)))
-
-        if self.n_estimators <= 0:
-            raise ValueError("n_estimators must be greater than zero, "
-                             "got {}.".format(self.n_estimators))
-
-        if self.base_estimator is not None:
-            self.base_estimator_ = clone(self.base_estimator)
-        else:
-            self.base_estimator_ = clone(default)
+        super()._validate_estimator()
 
         self.base_sampler_ = RandomUnderSampler(
             sampling_strategy=self.sampling_strategy,

diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py
@@ -11,7 +11,7 @@
 
 @pytest.fixture
 def imbalanced_dataset():
-    return make_classification(n_samples=10000, n_features=2, n_informative=2,
+    return make_classification(n_samples=10000, n_features=3, n_informative=2,
                                n_redundant=0, n_repeated=0, n_classes=3,
                                n_clusters_per_class=1,
                                weights=[0.01, 0.05, 0.94], class_sep=0.8,
@@ -33,7 +33,9 @@ def test_balanced_random_forest_error(imbalanced_dataset, boosting_params,
 @pytest.mark.parametrize('algorithm', ['SAMME', 'SAMME.R'])
 def test_rusboost(imbalanced_dataset, algorithm):
     X, y = imbalanced_dataset
-    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
+    X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                        stratify=y,
+                                                        random_state=1)
     classes = np.unique(y)
 
     n_estimators = 500