From 4dc8404e89eac12e7f48de2b3f258eac34d1a911 Mon Sep 17 00:00:00 2001 From: chkoar Date: Wed, 13 Feb 2019 17:54:41 +0200 Subject: [PATCH 01/15] Explicit fails on not supported targets --- imblearn/ensemble/_bagging.py | 2 ++ imblearn/ensemble/_easy_ensemble.py | 2 ++ imblearn/utils/_validation.py | 13 +++++++++---- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py index 3bcce7fd4..c15b0ea22 100644 --- a/imblearn/ensemble/_bagging.py +++ b/imblearn/ensemble/_bagging.py @@ -17,6 +17,7 @@ from ..under_sampling.base import BaseUnderSampler from ..utils import Substitution from ..utils._docstring import _random_state_docstring +from ..utils._validation import _is_multilabel_or_multioutput @Substitution( @@ -240,6 +241,7 @@ def fit(self, X, y): self : object Returns self. """ + _is_multilabel_or_multioutput(y) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. return self._fit(X, y, self.max_samples, sample_weight=None) diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index 97420a778..f9bb3d20d 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -19,6 +19,7 @@ from ..under_sampling.base import BaseUnderSampler from ..utils import Substitution from ..utils._docstring import _random_state_docstring +from ..utils._validation import _is_multilabel_or_multioutput from ..pipeline import Pipeline MAX_INT = np.iinfo(np.int32).max @@ -289,6 +290,7 @@ def fit(self, X, y): self : object Returns self. """ + _is_multilabel_or_multioutput(y) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. return self._fit(X, y, self.max_samples, sample_weight=None) diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py index eaa523e15..e526c3985 100644 --- a/imblearn/utils/_validation.py +++ b/imblearn/utils/_validation.py @@ -87,15 +87,20 @@ def check_target_type(y, indicate_one_vs_all=False): """ type_y = type_of_target(y) if type_y == 'multilabel-indicator': - if np.any(y.sum(axis=1) > 1): - raise ValueError( - "When 'y' corresponds to '{}', 'y' should encode the " - "multiclass (a single 1 by row).".format(type_y)) + _is_multilabel_or_multioutput(y) y = y.argmax(axis=1) return (y, type_y == 'multilabel-indicator') if indicate_one_vs_all else y +def _is_multilabel_or_multioutput(y): + if np.any(y.sum(axis=1) > 1): + raise ValueError( + "Imbalanced-learn currently supports binary, multiclass and " + "binrarized encoded multiclasss targets. Multilabel and " + "mutlioutput targets is not supported yet.") + + def _sampling_strategy_all(y, sampling_type): """Returns sampling target by targeting all classes.""" target_stats = _count_class_sample(y) From 5284c778277d03774afb7c305d65cc82ab9a3e25 Mon Sep 17 00:00:00 2001 From: chkoar Date: Wed, 13 Feb 2019 20:17:35 +0200 Subject: [PATCH 02/15] Refactor iteration --- imblearn/ensemble/_bagging.py | 5 ++--- imblearn/ensemble/_easy_ensemble.py | 3 +-- imblearn/ensemble/_weight_boosting.py | 3 ++- imblearn/utils/_validation.py | 14 +++++--------- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py index c15b0ea22..fa865fabd 100644 --- a/imblearn/ensemble/_bagging.py +++ b/imblearn/ensemble/_bagging.py @@ -15,9 +15,8 @@ from ..pipeline import Pipeline from ..under_sampling import RandomUnderSampler from ..under_sampling.base import BaseUnderSampler -from ..utils import Substitution +from ..utils import Substitution, check_target_type from ..utils._docstring import _random_state_docstring -from ..utils._validation import _is_multilabel_or_multioutput @Substitution( @@ -241,7 +240,7 @@ def fit(self, X, y): self : object Returns self. """ - _is_multilabel_or_multioutput(y) + check_target_type(y) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. return self._fit(X, y, self.max_samples, sample_weight=None) diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index f9bb3d20d..bdc763151 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -17,9 +17,8 @@ from .base import BaseEnsembleSampler from ..under_sampling import RandomUnderSampler from ..under_sampling.base import BaseUnderSampler -from ..utils import Substitution +from ..utils import Substitution, check_target_type from ..utils._docstring import _random_state_docstring -from ..utils._validation import _is_multilabel_or_multioutput from ..pipeline import Pipeline MAX_INT = np.iinfo(np.int32).max diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index 0c2618202..5888c5eb8 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -12,7 +12,7 @@ from ..under_sampling.base import BaseUnderSampler from ..under_sampling import RandomUnderSampler from ..pipeline import make_pipeline -from ..utils import Substitution +from ..utils import Substitution, check_target_type from ..utils._docstring import _random_state_docstring @@ -146,6 +146,7 @@ def fit(self, X, y, sample_weight=None): Returns self. """ + check_target_type(y) self.samplers_ = [] self.pipelines_ = [] super(RUSBoostClassifier, self).fit(X, y, sample_weight) diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py index e526c3985..e2b6fa89b 100644 --- a/imblearn/utils/_validation.py +++ b/imblearn/utils/_validation.py @@ -87,20 +87,16 @@ def check_target_type(y, indicate_one_vs_all=False): """ type_y = type_of_target(y) if type_y == 'multilabel-indicator': - _is_multilabel_or_multioutput(y) + if np.any(y.sum(axis=1) > 1): + raise ValueError( + "Imbalanced-learn currently supports binary, multiclass and " + "binrarized encoded multiclasss targets. Multilabel and " + "mutlioutput targets is not supported.") y = y.argmax(axis=1) return (y, type_y == 'multilabel-indicator') if indicate_one_vs_all else y -def _is_multilabel_or_multioutput(y): - if np.any(y.sum(axis=1) > 1): - raise ValueError( - "Imbalanced-learn currently supports binary, multiclass and " - "binrarized encoded multiclasss targets. Multilabel and " - "mutlioutput targets is not supported yet.") - - def _sampling_strategy_all(y, sampling_type): """Returns sampling target by targeting all classes.""" target_stats = _count_class_sample(y) From 1df4a0eaa2475f3da561e055d864692c2c645cb1 Mon Sep 17 00:00:00 2001 From: chkoar Date: Wed, 13 Feb 2019 20:20:51 +0200 Subject: [PATCH 03/15] fix misspelled word --- imblearn/utils/_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py index e2b6fa89b..3c06df901 100644 --- a/imblearn/utils/_validation.py +++ b/imblearn/utils/_validation.py @@ -90,7 +90,7 @@ def check_target_type(y, indicate_one_vs_all=False): if np.any(y.sum(axis=1) > 1): raise ValueError( "Imbalanced-learn currently supports binary, multiclass and " - "binrarized encoded multiclasss targets. Multilabel and " + "binarized encoded multiclasss targets. Multilabel and " "mutlioutput targets is not supported.") y = y.argmax(axis=1) From 09c2d72bbca785f2ecc8abaf359a8adb09c731dd Mon Sep 17 00:00:00 2001 From: chkoar Date: Wed, 13 Feb 2019 20:33:48 +0200 Subject: [PATCH 04/15] fix wrong function --- imblearn/ensemble/_easy_ensemble.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index bdc763151..6f4e51b1e 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -289,7 +289,7 @@ def fit(self, X, y): self : object Returns self. """ - _is_multilabel_or_multioutput(y) + check_target_type(y) # RandomUnderSampler is not supporting sample_weight. We need to pass # None. return self._fit(X, y, self.max_samples, sample_weight=None) From a63c17153f7d25e22eabb919639555754dffdd4f Mon Sep 17 00:00:00 2001 From: chkoar Date: Wed, 13 Feb 2019 21:23:45 +0200 Subject: [PATCH 05/15] Add/refactor tests --- imblearn/ensemble/tests/test_bagging.py | 10 ++++++++++ imblearn/ensemble/tests/test_easy_ensemble.py | 8 ++++++++ imblearn/ensemble/tests/test_weight_boosting.py | 11 ++++++++++- imblearn/over_sampling/tests/test_smote_nc.py | 3 ++- imblearn/utils/estimator_checks.py | 3 ++- 5 files changed, 32 insertions(+), 3 deletions(-) diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index da7d9b793..ed3669d35 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -3,6 +3,7 @@ # Christos Aridas # License: MIT +import pytest import numpy as np from sklearn.datasets import load_iris, make_hastie_10_2 @@ -493,3 +494,12 @@ def test_max_samples_consistency(): random_state=1) bagging.fit(X, y) assert bagging._max_samples == max_samples + + +def test_bagging_multioutput_multilabel_error(): + from sklearn.datasets import make_multilabel_classification + X, y = make_multilabel_classification(n_samples=30) + model = BalancedBaggingClassifier() + msg = "Multilabel and mutlioutput targets is not supported." + with pytest.raises(ValueError, match=msg): + model.fit(X, y) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index 158ac174e..b5b691b56 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -294,3 +294,11 @@ def test_easy_ensemble_classifier_grid_search(): EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5, iid=False) grid_search.fit(X, y) + +def test_easy_ensemble_classifier_multioutput_multilabel_error(): + from sklearn.datasets import make_multilabel_classification + X, y = make_multilabel_classification(n_samples=30) + model = EasyEnsembleClassifier() + msg = "Multilabel and mutlioutput targets is not supported." + with pytest.raises(ValueError, match=msg): + model.fit(X, y) \ No newline at end of file diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index a7f963434..1d8cbc639 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -23,7 +23,7 @@ def imbalanced_dataset(): [({"n_estimators": 'whatever'}, "n_estimators must be an integer"), ({"n_estimators": -100}, "n_estimators must be greater than zero")] ) -def test_balanced_random_forest_error(imbalanced_dataset, boosting_params, +def test_rusboost_error(imbalanced_dataset, boosting_params, err_msg): rusboost = RUSBoostClassifier(**boosting_params) with pytest.raises(ValueError, message=err_msg): @@ -91,3 +91,12 @@ def test_rusboost_sample_weight(imbalanced_dataset, algorithm): with pytest.raises(AssertionError): assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight) + + +def test_rusboost_multioutput_multilabel_error(): + from sklearn.datasets import make_multilabel_classification + X, y = make_multilabel_classification(n_samples=30) + model = RUSBoostClassifier() + msg = "Multilabel and mutlioutput targets is not supported." + with pytest.raises(ValueError, match=msg): + model.fit(X, y) \ No newline at end of file diff --git a/imblearn/over_sampling/tests/test_smote_nc.py b/imblearn/over_sampling/tests/test_smote_nc.py index 65598523b..480c45e26 100644 --- a/imblearn/over_sampling/tests/test_smote_nc.py +++ b/imblearn/over_sampling/tests/test_smote_nc.py @@ -131,7 +131,8 @@ def test_smotenc_check_target_type(): smote.fit_resample(X, y) rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) - with pytest.raises(ValueError, match="'y' should encode the multiclass"): + msg = "Multilabel and mutlioutput targets is not supported." + with pytest.raises(ValueError, match=msg): smote.fit_resample(X, y) def test_smotenc_samplers_one_label(): diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 7d08f3313..b1aab7967 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -132,7 +132,8 @@ def check_target_type(name, Estimator): # if the target is multilabel then we should raise an error rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) - with pytest.raises(ValueError, match="'y' should encode the multiclass"): + msg = "Multilabel and mutlioutput targets is not supported." + with pytest.raises(ValueError, match=msg): estimator.fit_resample(X, y) From d891eb4f7401023c94c3837414f222ff3e968815 Mon Sep 17 00:00:00 2001 From: chkoar Date: Thu, 14 Feb 2019 12:34:45 +0200 Subject: [PATCH 06/15] Make pep8speaks happy --- imblearn/ensemble/tests/test_easy_ensemble.py | 3 ++- imblearn/over_sampling/tests/test_smote_nc.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index b5b691b56..80cfe5866 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -295,10 +295,11 @@ def test_easy_ensemble_classifier_grid_search(): parameters, cv=5, iid=False) grid_search.fit(X, y) + def test_easy_ensemble_classifier_multioutput_multilabel_error(): from sklearn.datasets import make_multilabel_classification X, y = make_multilabel_classification(n_samples=30) model = EasyEnsembleClassifier() msg = "Multilabel and mutlioutput targets is not supported." with pytest.raises(ValueError, match=msg): - model.fit(X, y) \ No newline at end of file + model.fit(X, y) diff --git a/imblearn/over_sampling/tests/test_smote_nc.py b/imblearn/over_sampling/tests/test_smote_nc.py index 480c45e26..1c6affdb1 100644 --- a/imblearn/over_sampling/tests/test_smote_nc.py +++ b/imblearn/over_sampling/tests/test_smote_nc.py @@ -135,6 +135,7 @@ def test_smotenc_check_target_type(): with pytest.raises(ValueError, match=msg): smote.fit_resample(X, y) + def test_smotenc_samplers_one_label(): X, _, categorical_features = data_heterogneous_unordered() y = np.zeros(30) From 754c8c9447b5132a540b17b7c036966f24f22488 Mon Sep 17 00:00:00 2001 From: chkoar Date: Thu, 14 Feb 2019 15:13:08 +0200 Subject: [PATCH 07/15] Make pep8speaks happy again --- imblearn/ensemble/tests/test_easy_ensemble.py | 2 +- imblearn/ensemble/tests/test_weight_boosting.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index 80cfe5866..112cd5285 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -295,7 +295,7 @@ def test_easy_ensemble_classifier_grid_search(): parameters, cv=5, iid=False) grid_search.fit(X, y) - + def test_easy_ensemble_classifier_multioutput_multilabel_error(): from sklearn.datasets import make_multilabel_classification X, y = make_multilabel_classification(n_samples=30) diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 1d8cbc639..4a65e1a90 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -23,8 +23,7 @@ def imbalanced_dataset(): [({"n_estimators": 'whatever'}, "n_estimators must be an integer"), ({"n_estimators": -100}, "n_estimators must be greater than zero")] ) -def test_rusboost_error(imbalanced_dataset, boosting_params, - err_msg): +def test_rusboost_error(imbalanced_dataset, boosting_params, err_msg): rusboost = RUSBoostClassifier(**boosting_params) with pytest.raises(ValueError, message=err_msg): rusboost.fit(*imbalanced_dataset) @@ -99,4 +98,4 @@ def test_rusboost_multioutput_multilabel_error(): model = RUSBoostClassifier() msg = "Multilabel and mutlioutput targets is not supported." with pytest.raises(ValueError, match=msg): - model.fit(X, y) \ No newline at end of file + model.fit(X, y) From 7efd0813660fa8b08771c9b443fbea574061ea4c Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 14:15:43 +0200 Subject: [PATCH 08/15] doc fix --- imblearn/ensemble/tests/test_easy_ensemble.py | 2 +- imblearn/ensemble/tests/test_weight_boosting.py | 2 +- imblearn/over_sampling/tests/test_smote_nc.py | 2 +- imblearn/utils/estimator_checks.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index 112cd5285..0adb527ff 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -300,6 +300,6 @@ def test_easy_ensemble_classifier_multioutput_multilabel_error(): from sklearn.datasets import make_multilabel_classification X, y = make_multilabel_classification(n_samples=30) model = EasyEnsembleClassifier() - msg = "Multilabel and mutlioutput targets is not supported." + msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): model.fit(X, y) diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 4a65e1a90..85e82a623 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -96,6 +96,6 @@ def test_rusboost_multioutput_multilabel_error(): from sklearn.datasets import make_multilabel_classification X, y = make_multilabel_classification(n_samples=30) model = RUSBoostClassifier() - msg = "Multilabel and mutlioutput targets is not supported." + msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): model.fit(X, y) diff --git a/imblearn/over_sampling/tests/test_smote_nc.py b/imblearn/over_sampling/tests/test_smote_nc.py index 1c6affdb1..e03af02d4 100644 --- a/imblearn/over_sampling/tests/test_smote_nc.py +++ b/imblearn/over_sampling/tests/test_smote_nc.py @@ -131,7 +131,7 @@ def test_smotenc_check_target_type(): smote.fit_resample(X, y) rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) - msg = "Multilabel and mutlioutput targets is not supported." + msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): smote.fit_resample(X, y) diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index b1aab7967..40fcf7505 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -132,7 +132,7 @@ def check_target_type(name, Estimator): # if the target is multilabel then we should raise an error rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) - msg = "Multilabel and mutlioutput targets is not supported." + msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): estimator.fit_resample(X, y) From 8e0619bce4cddec2a94cd7523ec29076d9f3e917 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 15:03:54 +0200 Subject: [PATCH 09/15] doc fix --- imblearn/ensemble/tests/test_bagging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index ed3669d35..f8f6cc76a 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -500,6 +500,6 @@ def test_bagging_multioutput_multilabel_error(): from sklearn.datasets import make_multilabel_classification X, y = make_multilabel_classification(n_samples=30) model = BalancedBaggingClassifier() - msg = "Multilabel and mutlioutput targets is not supported." + msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): model.fit(X, y) From 792723708a17e5eddc4027daf2f8fcbb7844d2bf Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 15:43:22 +0200 Subject: [PATCH 10/15] docfix --- imblearn/utils/_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py index 3c06df901..3a63eda58 100644 --- a/imblearn/utils/_validation.py +++ b/imblearn/utils/_validation.py @@ -91,7 +91,7 @@ def check_target_type(y, indicate_one_vs_all=False): raise ValueError( "Imbalanced-learn currently supports binary, multiclass and " "binarized encoded multiclasss targets. Multilabel and " - "mutlioutput targets is not supported.") + "multioutput targets is not supported.") y = y.argmax(axis=1) return (y, type_y == 'multilabel-indicator') if indicate_one_vs_all else y From 3100cbdd1fd9120dafa8cb79fc8c53e8839c71c3 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 15:55:29 +0200 Subject: [PATCH 11/15] doc fix --- imblearn/utils/_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imblearn/utils/_validation.py b/imblearn/utils/_validation.py index 3a63eda58..c800bdfd3 100644 --- a/imblearn/utils/_validation.py +++ b/imblearn/utils/_validation.py @@ -91,7 +91,7 @@ def check_target_type(y, indicate_one_vs_all=False): raise ValueError( "Imbalanced-learn currently supports binary, multiclass and " "binarized encoded multiclasss targets. Multilabel and " - "multioutput targets is not supported.") + "multioutput targets are not supported.") y = y.argmax(axis=1) return (y, type_y == 'multilabel-indicator') if indicate_one_vs_all else y From 6baf06adfbe02cfeaa9341caf56ec95b01e94558 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 16:06:33 +0200 Subject: [PATCH 12/15] Add a common test for failing on multilabel/multioutput targets --- imblearn/ensemble/tests/test_bagging.py | 8 -------- imblearn/ensemble/tests/test_easy_ensemble.py | 8 -------- imblearn/ensemble/tests/test_weight_boosting.py | 8 -------- imblearn/utils/estimator_checks.py | 15 ++++++++++++++- 4 files changed, 14 insertions(+), 25 deletions(-) diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index f8f6cc76a..2a73c133c 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -495,11 +495,3 @@ def test_max_samples_consistency(): bagging.fit(X, y) assert bagging._max_samples == max_samples - -def test_bagging_multioutput_multilabel_error(): - from sklearn.datasets import make_multilabel_classification - X, y = make_multilabel_classification(n_samples=30) - model = BalancedBaggingClassifier() - msg = "Multilabel and multioutput targets are not supported." - with pytest.raises(ValueError, match=msg): - model.fit(X, y) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index 0adb527ff..011f846be 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -295,11 +295,3 @@ def test_easy_ensemble_classifier_grid_search(): parameters, cv=5, iid=False) grid_search.fit(X, y) - -def test_easy_ensemble_classifier_multioutput_multilabel_error(): - from sklearn.datasets import make_multilabel_classification - X, y = make_multilabel_classification(n_samples=30) - model = EasyEnsembleClassifier() - msg = "Multilabel and multioutput targets are not supported." - with pytest.raises(ValueError, match=msg): - model.fit(X, y) diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 85e82a623..0cbc84327 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -91,11 +91,3 @@ def test_rusboost_sample_weight(imbalanced_dataset, algorithm): with pytest.raises(AssertionError): assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight) - -def test_rusboost_multioutput_multilabel_error(): - from sklearn.datasets import make_multilabel_classification - X, y = make_multilabel_classification(n_samples=30) - model = RUSBoostClassifier() - msg = "Multilabel and multioutput targets are not supported." - with pytest.raises(ValueError, match=msg): - model.fit(X, y) diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 40fcf7505..160a023c6 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -17,7 +17,7 @@ from scipy import sparse from sklearn.base import clone -from sklearn.datasets import make_classification +from sklearn.datasets import make_classification, make_multilabel_classification #noqa from sklearn.cluster import KMeans from sklearn.preprocessing import label_binarize from sklearn.utils.estimator_checks import check_estimator \ @@ -27,6 +27,7 @@ from sklearn.utils.testing import set_random_state from sklearn.utils.multiclass import type_of_target +from imblearn.base import BaseSampler from imblearn.over_sampling.base import BaseOverSampler from imblearn.under_sampling.base import BaseCleaningSampler, BaseUnderSampler from imblearn.ensemble.base import BaseEnsembleSampler @@ -80,6 +81,7 @@ def _yield_sampler_checks(name, Estimator): yield check_samplers_multiclass_ova yield check_samplers_preserve_dtype yield check_samplers_sample_indices + yield check_fail_on_multilabel_or_multioutput_targets def _yield_all_checks(name, estimator): @@ -375,3 +377,14 @@ def check_samplers_sample_indices(name, Sampler): assert hasattr(sampler, 'sample_indices_') else: assert not hasattr(sampler, 'sample_indices_') + + +def check_fail_on_multilabel_or_multioutput_targets(name, Estimator): + estimator = Estimator() + X, y = make_multilabel_classification(n_samples=30) + msg = "Multilabel and multioutput targets are not supported." + with pytest.raises(ValueError, match=msg): + if isinstance(estimator, BaseSampler): + estimator.fit_resample(X, y) + else: + estimator.fit(X, y) From bce8259196ce410884b24b07fa9c373614196d00 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 16:08:18 +0200 Subject: [PATCH 13/15] Remove unused import --- imblearn/ensemble/tests/test_bagging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index 2a73c133c..82d4820f1 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -3,7 +3,6 @@ # Christos Aridas # License: MIT -import pytest import numpy as np from sklearn.datasets import load_iris, make_hastie_10_2 From 48370b9666ccce4439618360dc7994e0e94a7d2d Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 16:19:01 +0200 Subject: [PATCH 14/15] Make pep8speaks happy --- imblearn/ensemble/tests/test_bagging.py | 1 - imblearn/ensemble/tests/test_easy_ensemble.py | 1 - imblearn/ensemble/tests/test_weight_boosting.py | 1 - imblearn/utils/estimator_checks.py | 2 +- 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index 82d4820f1..da7d9b793 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -493,4 +493,3 @@ def test_max_samples_consistency(): random_state=1) bagging.fit(X, y) assert bagging._max_samples == max_samples - diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index 011f846be..158ac174e 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -294,4 +294,3 @@ def test_easy_ensemble_classifier_grid_search(): EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5, iid=False) grid_search.fit(X, y) - diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 0cbc84327..f3cd85a0b 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -90,4 +90,3 @@ def test_rusboost_sample_weight(imbalanced_dataset, algorithm): with pytest.raises(AssertionError): assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight) - diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 160a023c6..590b6ee06 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -17,7 +17,7 @@ from scipy import sparse from sklearn.base import clone -from sklearn.datasets import make_classification, make_multilabel_classification #noqa +from sklearn.datasets import make_classification, make_multilabel_classification # noqa from sklearn.cluster import KMeans from sklearn.preprocessing import label_binarize from sklearn.utils.estimator_checks import check_estimator \ From 08c08305fe37fbd9da86515ec8f6c162946b9408 Mon Sep 17 00:00:00 2001 From: chkoar Date: Fri, 15 Feb 2019 20:01:18 +0200 Subject: [PATCH 15/15] Refactor tests --- imblearn/utils/estimator_checks.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 590b6ee06..5fe2e2a88 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -81,7 +81,10 @@ def _yield_sampler_checks(name, Estimator): yield check_samplers_multiclass_ova yield check_samplers_preserve_dtype yield check_samplers_sample_indices - yield check_fail_on_multilabel_or_multioutput_targets + + +def _yield_classifier_checks(name, Estimator): + yield check_classifier_on_multilabel_or_multioutput_targets def _yield_all_checks(name, estimator): @@ -89,6 +92,9 @@ def _yield_all_checks(name, estimator): if hasattr(estimator, 'fit_resample'): for check in _yield_sampler_checks(name, estimator): yield check + if hasattr(estimator, 'predict'): + for check in _yield_classifier_checks(name, estimator): + yield check def check_estimator(Estimator, run_sampler_tests=True): @@ -379,12 +385,9 @@ def check_samplers_sample_indices(name, Sampler): assert not hasattr(sampler, 'sample_indices_') -def check_fail_on_multilabel_or_multioutput_targets(name, Estimator): +def check_classifier_on_multilabel_or_multioutput_targets(name, Estimator): estimator = Estimator() X, y = make_multilabel_classification(n_samples=30) msg = "Multilabel and multioutput targets are not supported." with pytest.raises(ValueError, match=msg): - if isinstance(estimator, BaseSampler): - estimator.fit_resample(X, y) - else: - estimator.fit(X, y) + estimator.fit(X, y)