From 068560369ff8c79cab9ba7c8275a510f24142733 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 15:09:26 +0200 Subject: [PATCH 1/6] Use a stump as base estimator in RUSBoostClassifier --- imblearn/ensemble/_weight_boosting.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index 0c2618202..9524634b1 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -30,10 +30,11 @@ class RUSBoostClassifier(AdaBoostClassifier): Parameters ---------- - base_estimator : object, optional (default=DecisionTreeClassifier) + base_estimator : object, optional (default=None) The base estimator from which the boosted ensemble is built. - Support for sample weighting is required, as well as proper `classes_` - and `n_classes_` attributes. + Support for sample weighting is required, as well as proper + ``classes_`` and ``n_classes_`` attributes. If ``None``, then + the base estimator is ``DecisionTreeClassifier(max_depth=1)`` n_estimators : integer, optional (default=50) The maximum number of estimators at which boosting is terminated. @@ -151,7 +152,7 @@ def fit(self, X, y, sample_weight=None): super(RUSBoostClassifier, self).fit(X, y, sample_weight) return self - def _validate_estimator(self, default=DecisionTreeClassifier()): + def _validate_estimator(self, default=DecisionTreeClassifier(max_depth=1)): """Check the estimator and the n_estimator attribute, set the `base_estimator_` attribute.""" if not isinstance(self.n_estimators, (numbers.Integral, np.integer)): From cd4e0bd6e77ee5a68c88ac59a0bfe11b4c6788c2 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 16:54:41 +0200 Subject: [PATCH 2/6] Call super _validate_estimator to set the default estimator --- imblearn/ensemble/_weight_boosting.py | 16 +++------------- imblearn/ensemble/tests/test_weight_boosting.py | 2 +- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index 9524634b1..a186a0ff0 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -152,21 +152,11 @@ def fit(self, X, y, sample_weight=None): super(RUSBoostClassifier, self).fit(X, y, sample_weight) return self - def _validate_estimator(self, default=DecisionTreeClassifier(max_depth=1)): + def _validate_estimator(self): """Check the estimator and the n_estimator attribute, set the `base_estimator_` attribute.""" - if not isinstance(self.n_estimators, (numbers.Integral, np.integer)): - raise ValueError("n_estimators must be an integer, " - "got {0}.".format(type(self.n_estimators))) - - if self.n_estimators <= 0: - raise ValueError("n_estimators must be greater than zero, " - "got {0}.".format(self.n_estimators)) - - if self.base_estimator is not None: - self.base_estimator_ = clone(self.base_estimator) - else: - self.base_estimator_ = clone(default) + + super(RUSBoostClassifier, self)._validate_estimator() self.base_sampler_ = RandomUnderSampler( sampling_strategy=self.sampling_strategy, diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index a7f963434..8194de809 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -11,7 +11,7 @@ @pytest.fixture def imbalanced_dataset(): - return make_classification(n_samples=10000, n_features=2, n_informative=2, + return make_classification(n_samples=10000, n_features=3, n_informative=2, n_redundant=0, n_repeated=0, n_classes=3, n_clusters_per_class=1, weights=[0.01, 0.05, 0.94], class_sep=0.8, From 810e1e9d155984c789aaebb418a91ebac7f60951 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 16:56:20 +0200 Subject: [PATCH 3/6] make pep8speaks happy --- imblearn/ensemble/_weight_boosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index a186a0ff0..1560d9297 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -155,7 +155,7 @@ def fit(self, X, y, sample_weight=None): def _validate_estimator(self): """Check the estimator and the n_estimator attribute, set the `base_estimator_` attribute.""" - + super(RUSBoostClassifier, self)._validate_estimator() self.base_sampler_ = RandomUnderSampler( From 02c3f1d85c1f048fcc5029ea3544af312072d154 Mon Sep 17 00:00:00 2001 From: chkoar Date: Sat, 23 Feb 2019 14:42:15 +0200 Subject: [PATCH 4/6] Fix test --- imblearn/ensemble/tests/test_weight_boosting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 8194de809..6749f0753 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -33,7 +33,9 @@ def test_balanced_random_forest_error(imbalanced_dataset, boosting_params, @pytest.mark.parametrize('algorithm', ['SAMME', 'SAMME.R']) def test_rusboost(imbalanced_dataset, algorithm): X, y = imbalanced_dataset - X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y) + X_train, X_test, y_train, y_test = train_test_split(X, y, + stratify=y, + random_state=1) classes = np.unique(y) n_estimators = 500 From b960ec7c8621f4bdcf09dfcf71b66f5a051fe67a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 11 Jun 2019 23:49:39 +0200 Subject: [PATCH 5/6] iter --- doc/ensemble.rst | 5 +++-- doc/whats_new/v0.5.rst | 15 +++++++++++++++ imblearn/ensemble/_weight_boosting.py | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/doc/ensemble.rst b/doc/ensemble.rst index 4b1142fb5..0700ea0aa 100644 --- a/doc/ensemble.rst +++ b/doc/ensemble.rst @@ -93,12 +93,13 @@ Several methods taking advantage of boosting have been designed. a boosting iteration [SKHN2010]_:: >>> from imblearn.ensemble import RUSBoostClassifier - >>> rusboost = RUSBoostClassifier(random_state=0) + >>> rusboost = RUSBoostClassifier(n_estimators=200, algorithm='SAMME.R', + ... random_state=0) >>> rusboost.fit(X_train, y_train) # doctest: +ELLIPSIS RUSBoostClassifier(...) >>> y_pred = rusboost.predict(X_test) >>> balanced_accuracy_score(y_test, y_pred) # doctest: +ELLIPSIS - 0.74... + 0.66... A specific method which uses ``AdaBoost`` as learners in the bagging classifier is called EasyEnsemble. The :class:`EasyEnsembleClassifier` allows to bag diff --git a/doc/whats_new/v0.5.rst b/doc/whats_new/v0.5.rst index 02c3fd010..61313cece 100644 --- a/doc/whats_new/v0.5.rst +++ b/doc/whats_new/v0.5.rst @@ -6,6 +6,16 @@ Version 0.5 (under development) Changelog --------- +Changed models +.............. + +The following models or function might give different results even if the +same data ``X`` and ``y`` are the same. + +* :class:`imblearn.ensemble.RUSBoostClassifier` default estimator changed from + :class:`sklearn.tree.DecisionTreeClassifier` with full depth to a decision + stump (i.e., tree with ``max_depth=1``). + Documentation ............. @@ -53,3 +63,8 @@ Bug - Fix bug in :class:`imblearn.pipeline.Pipeline` where None could be the final estimator. :pr:`554` by :user:`Oliver Rausch `. + +- Fix bug by changing the default depth in + :class:`imblearn.ensemble.RUSBoostClassifier` to get a decision stump as a + weak learner as in the original paper. + :pr:`545` by :user:`Christos Aridas `. diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index f77c74cf6..a830bfe29 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -155,7 +155,7 @@ def fit(self, X, y, sample_weight=None): def _validate_estimator(self): """Check the estimator and the n_estimator attribute, set the `base_estimator_` attribute.""" - super(RUSBoostClassifier, self)._validate_estimator() + super()._validate_estimator() self.base_sampler_ = RandomUnderSampler( sampling_strategy=self.sampling_strategy, From e7b4356cedcfb5421ec787a46a45337212f138e5 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 11 Jun 2019 23:51:35 +0200 Subject: [PATCH 6/6] PEP8 --- imblearn/ensemble/tests/test_weight_boosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index e95671e5b..5c89ba5f5 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -33,7 +33,7 @@ def test_balanced_random_forest_error(imbalanced_dataset, boosting_params, @pytest.mark.parametrize('algorithm', ['SAMME', 'SAMME.R']) def test_rusboost(imbalanced_dataset, algorithm): X, y = imbalanced_dataset - X_train, X_test, y_train, y_test = train_test_split(X, y, + X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=1) classes = np.unique(y)