diff --git a/doc/developers_utils.rst b/doc/developers_utils.rst index c0e577934..ed4f34767 100644 --- a/doc/developers_utils.rst +++ b/doc/developers_utils.rst @@ -101,3 +101,45 @@ On the top of all the functionality provided by scikit-learn. Imbalance-learn provides :func:`deprecate_parameter`: which is used to deprecate a sampler's parameter (attribute) by another one. +Testing utilities +================= +Currently, imbalanced-learn provide a warning management utility. This feature +is going to be merge in pytest and will be removed when the pytest release will +have it. + +If using Python 2.7 or above, you may use this function as a +context manager:: + + >>> import warnings + >>> from imblearn.utils.testing import warns + >>> with warns(RuntimeWarning): + ... warnings.warn("my runtime warning", RuntimeWarning) + + >>> with warns(RuntimeWarning): + ... pass + Traceback (most recent call last): + ... + Failed: DID NOT WARN. No warnings of type ...RuntimeWarning... was emitted... + + >>> with warns(RuntimeWarning): + ... warnings.warn(UserWarning) + Traceback (most recent call last): + ... + Failed: DID NOT WARN. No warnings of type ...RuntimeWarning... was emitted... + +In the context manager form you may use the keyword argument ``match`` to assert +that the exception matches a text or regex:: + + >>> import warnings + >>> from imblearn.utils.testing import warns + >>> with warns(UserWarning, match='must be 0 or None'): + ... warnings.warn("value must be 0 or None", UserWarning) + + >>> with warns(UserWarning, match=r'must be \d+$'): + ... warnings.warn("value must be 42", UserWarning) + + >>> with warns(UserWarning, match=r'must be \d+$'): + ... warnings.warn("this is not here", UserWarning) + Traceback (most recent call last): + ... + AssertionError: 'must be \d+$' pattern not found in ['this is not here'] diff --git a/imblearn/combine/tests/test_smote_enn.py b/imblearn/combine/tests/test_smote_enn.py index 284185de6..67c7cca90 100644 --- a/imblearn/combine/tests/test_smote_enn.py +++ b/imblearn/combine/tests/test_smote_enn.py @@ -6,8 +6,9 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_raises_regex from imblearn.combine import SMOTEENN from imblearn.under_sampling import EditedNearestNeighbours @@ -113,8 +114,8 @@ def test_error_wrong_object(): smote = 'rnd' enn = 'rnd' smt = SMOTEENN(smote=smote, random_state=RND_SEED) - assert_raises_regex(ValueError, "smote needs to be a SMOTE", - smt.fit_sample, X, Y) + with raises(ValueError, match="smote needs to be a SMOTE"): + smt.fit_sample(X, Y) smt = SMOTEENN(enn=enn, random_state=RND_SEED) - assert_raises_regex(ValueError, "enn needs to be an ", - smt.fit_sample, X, Y) + with raises(ValueError, match="enn needs to be an "): + smt.fit_sample(X, Y) diff --git a/imblearn/combine/tests/test_smote_tomek.py b/imblearn/combine/tests/test_smote_tomek.py index 20f4b53d1..3da2243dc 100644 --- a/imblearn/combine/tests/test_smote_tomek.py +++ b/imblearn/combine/tests/test_smote_tomek.py @@ -6,8 +6,9 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_raises_regex from imblearn.combine import SMOTETomek from imblearn.over_sampling import SMOTE @@ -156,8 +157,8 @@ def test_error_wrong_object(): smote = 'rnd' tomek = 'rnd' smt = SMOTETomek(smote=smote, random_state=RND_SEED) - assert_raises_regex(ValueError, "smote needs to be a SMOTE", - smt.fit_sample, X, Y) + with raises(ValueError, match="smote needs to be a SMOTE"): + smt.fit_sample(X, Y) smt = SMOTETomek(tomek=tomek, random_state=RND_SEED) - assert_raises_regex(ValueError, "tomek needs to be a TomekLinks", - smt.fit_sample, X, Y) + with raises(ValueError, match="tomek needs to be a TomekLinks"): + smt.fit_sample(X, Y) diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py index c427a658d..b54700d20 100644 --- a/imblearn/datasets/tests/test_imbalance.py +++ b/imblearn/datasets/tests/test_imbalance.py @@ -10,10 +10,11 @@ import numpy as np +from pytest import raises + from sklearn.datasets import load_iris -from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_warns_message +from imblearn.utils.testing import warns from imblearn.datasets import make_imbalance data = load_iris() @@ -24,28 +25,28 @@ def test_make_imbalance_error(): # we are reusing part of utils.check_ratio, however this is not cover in # the common tests so we will repeat it here ratio = {0: -100, 1: 50, 2: 50} - assert_raises_regex(ValueError, "in a class cannot be negative", - make_imbalance, X, Y, ratio) + with raises(ValueError, match="in a class cannot be negative"): + make_imbalance(X, Y, ratio) ratio = {0: 10, 1: 70} - assert_raises_regex(ValueError, "should be less or equal to the original", - make_imbalance, X, Y, ratio) + with raises(ValueError, match="should be less or equal to the original"): + make_imbalance(X, Y, ratio) y_ = np.zeros((X.shape[0], )) ratio = {0: 10} - assert_raises_regex(ValueError, "needs to have more than 1 class.", - make_imbalance, X, y_, ratio) + with raises(ValueError, match="needs to have more than 1 class."): + make_imbalance(X, y_, ratio) ratio = 'random-string' - assert_raises_regex(ValueError, "has to be a dictionary or a function", - make_imbalance, X, Y, ratio) + with raises(ValueError, match="has to be a dictionary or a function"): + make_imbalance(X, Y, ratio) # FIXME: to be removed in 0.4 due to deprecation def test_make_imbalance_float(): - X_, y_ = assert_warns_message(DeprecationWarning, - "'min_c_' is deprecated in 0.2", - make_imbalance, X, Y, ratio=0.5, min_c_=1) - X_, y_ = assert_warns_message(DeprecationWarning, - "'ratio' being a float is deprecated", - make_imbalance, X, Y, ratio=0.5, min_c_=1) + with warns(DeprecationWarning, match="deprecated in 0.2"): + X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1) + + with warns(DeprecationWarning, match="'ratio' being a float"): + X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1) + assert Counter(y_) == {0: 50, 1: 25, 2: 50} # resample without using min_c_ X_, y_ = make_imbalance(X_, y_, ratio=0.25, min_c_=None) diff --git a/imblearn/datasets/tests/test_zenodo.py b/imblearn/datasets/tests/test_zenodo.py index 2a78e2c14..0977ed1a2 100644 --- a/imblearn/datasets/tests/test_zenodo.py +++ b/imblearn/datasets/tests/test_zenodo.py @@ -8,7 +8,8 @@ from imblearn.datasets import fetch_datasets from sklearn.utils.testing import SkipTest, assert_allclose -from sklearn.utils.testing import assert_raises_regex + +from pytest import raises DATASET_SHAPE = {'ecoli': (336, 7), 'optical_digits': (5620, 64), @@ -84,11 +85,11 @@ def test_fetch_filter(): def test_fetch_error(): - assert_raises_regex(ValueError, 'is not a dataset available.', - fetch_datasets, filter_data=tuple(['rnd'])) - assert_raises_regex(ValueError, 'dataset with the ID=', - fetch_datasets, filter_data=tuple([-1])) - assert_raises_regex(ValueError, 'dataset with the ID=', - fetch_datasets, filter_data=tuple([100])) - assert_raises_regex(ValueError, 'value in the tuple', - fetch_datasets, filter_data=tuple([1.00])) + with raises(ValueError, match='is not a dataset available.'): + fetch_datasets(filter_data=tuple(['rnd'])) + with raises(ValueError, match='dataset with the ID='): + fetch_datasets(filter_data=tuple([-1])) + with raises(ValueError, match='dataset with the ID='): + fetch_datasets(filter_data=tuple([100])) + with raises(ValueError, match='value in the tuple'): + fetch_datasets(filter_data=tuple([1.00])) diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py index dc6279c9e..a01e561d3 100644 --- a/imblearn/ensemble/tests/test_balance_cascade.py +++ b/imblearn/ensemble/tests/test_balance_cascade.py @@ -6,12 +6,15 @@ from __future__ import print_function import numpy as np -from sklearn.utils.testing import assert_array_equal, assert_raises -from sklearn.utils.testing import assert_raises_regex + +from pytest import raises + +from sklearn.utils.testing import assert_array_equal from sklearn.ensemble import RandomForestClassifier from imblearn.ensemble import BalanceCascade + RND_SEED = 0 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [1.25192108, -0.22367336], [0.53366841, -0.30312976], @@ -299,7 +302,8 @@ def test_fit_sample_auto_linear_svm(): def test_init_wrong_classifier(): classifier = 'rnd' bc = BalanceCascade(classifier=classifier) - assert_raises(NotImplementedError, bc.fit_sample, X, Y) + with raises(NotImplementedError): + bc.fit_sample(X, Y) def test_fit_sample_auto_early_stop(): @@ -362,5 +366,5 @@ def test_give_classifier_wrong_obj(): classifier = 2 bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, return_indices=True, estimator=classifier) - assert_raises_regex(ValueError, "Invalid parameter `estimator`", - bc.fit_sample, X, Y) + with raises(ValueError, match="Invalid parameter `estimator`"): + bc.fit_sample(X, Y) diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py index 41e368c05..76ac30c48 100644 --- a/imblearn/metrics/tests/test_classification.py +++ b/imblearn/metrics/tests/test_classification.py @@ -10,6 +10,8 @@ import numpy as np +from pytest import approx, raises + from sklearn import datasets from sklearn import svm @@ -17,9 +19,8 @@ from sklearn.utils.fixes import np_version from sklearn.utils.validation import check_random_state from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_no_warnings, assert_raises -from sklearn.utils.testing import assert_warns_message, ignore_warnings -from sklearn.utils.testing import assert_raise_message +from sklearn.utils.testing import assert_no_warnings +from sklearn.utils.testing import ignore_warnings from sklearn.metrics import accuracy_score, average_precision_score from sklearn.metrics import brier_score_loss, cohen_kappa_score from sklearn.metrics import jaccard_similarity_score, precision_score @@ -32,7 +33,8 @@ from imblearn.metrics import make_index_balanced_accuracy from imblearn.metrics import classification_report_imbalanced -from pytest import approx +from imblearn.utils.testing import warns + RND_SEED = 42 R_TOL = 1e-2 @@ -177,7 +179,8 @@ def test_sensitivity_specificity_error_multilabels(): y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) - assert_raises(ValueError, sensitivity_score, y_true_bin, y_pred_bin) + with raises(ValueError): + sensitivity_score(y_true_bin, y_pred_bin) @ignore_warnings @@ -185,32 +188,21 @@ def test_sensitivity_specificity_support_errors(): y_true, y_pred, _ = make_prediction(binary=True) # Bad pos_label - assert_raises( - ValueError, - sensitivity_specificity_support, - y_true, - y_pred, - pos_label=2, - average='binary') + with raises(ValueError): + sensitivity_specificity_support(y_true, y_pred, pos_label=2, + average='binary') # Bad average option - assert_raises( - ValueError, - sensitivity_specificity_support, [0, 1, 2], [1, 2, 0], - average='mega') + with raises(ValueError): + sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega') def test_sensitivity_specificity_unused_pos_label(): # but average != 'binary'; even if data is binary - assert_warns_message( - UserWarning, - "Note that pos_label (set to 2) is " - "ignored when average != 'binary' (got 'macro'). You " - "may use labels=[pos_label] to specify a single " - "positive class.", - sensitivity_specificity_support, [1, 2, 1], [1, 2, 2], - pos_label=2, - average='macro') + with warns(UserWarning, "use labels=\[pos_label\] to specify a single"): + sensitivity_specificity_support([1, 2, 1], [1, 2, 2], + pos_label=2, + average='macro') def test_geometric_mean_support_binary(): @@ -405,10 +397,8 @@ def test_classification_report_imbalanced_multiclass_with_unicode_label(): u'0.15 0.44 0.19 31 red\xa2 0.42 0.90 0.55 0.57 0.63 ' u'0.37 20 avg / total 0.51 0.53 0.80 0.47 0.62 0.41 75') if np_version[:3] < (1, 7, 0): - expected_message = ("NumPy < 1.7.0 does not implement" - " searchsorted on unicode data correctly.") - assert_raise_message(RuntimeError, expected_message, - classification_report_imbalanced, y_true, y_pred) + with raises(RuntimeError, match="NumPy < 1.7.0"): + classification_report_imbalanced(y_true, y_pred) else: report = classification_report_imbalanced(y_true, y_pred) assert _format_report(report) == expected_report @@ -459,16 +449,20 @@ def test_iba_error_y_score_prob(): aps = make_index_balanced_accuracy(alpha=0.5, squared=True)( average_precision_score) - assert_raises(AttributeError, aps, y_true, y_pred) + with raises(AttributeError): + aps(y_true, y_pred) brier = make_index_balanced_accuracy(alpha=0.5, squared=True)( brier_score_loss) - assert_raises(AttributeError, brier, y_true, y_pred) + with raises(AttributeError): + brier(y_true, y_pred) kappa = make_index_balanced_accuracy(alpha=0.5, squared=True)( cohen_kappa_score) - assert_raises(AttributeError, kappa, y_true, y_pred) + with raises(AttributeError): + kappa(y_true, y_pred) ras = make_index_balanced_accuracy(alpha=0.5, squared=True)( roc_auc_score) - assert_raises(AttributeError, ras, y_true, y_pred) + with raises(AttributeError): + ras(y_true, y_pred) diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py index c4b8823a6..c550d567f 100644 --- a/imblearn/over_sampling/tests/test_adasyn.py +++ b/imblearn/over_sampling/tests/test_adasyn.py @@ -6,12 +6,14 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_raises_regex from sklearn.neighbors import NearestNeighbors from imblearn.over_sampling import ADASYN + RND_SEED = 0 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [1.25192108, -0.22367336], [0.53366841, -0.30312976], @@ -141,5 +143,5 @@ def test_ada_fit_sample_nn_obj(): def test_ada_wrong_nn_obj(): nn = 'rnd' ada = ADASYN(random_state=RND_SEED, n_neighbors=nn) - assert_raises_regex(ValueError, "has to be one of", - ada.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + ada.fit_sample(X, Y) diff --git a/imblearn/over_sampling/tests/test_smote.py b/imblearn/over_sampling/tests/test_smote.py index 90dfcc836..449ce9307 100644 --- a/imblearn/over_sampling/tests/test_smote.py +++ b/imblearn/over_sampling/tests/test_smote.py @@ -6,13 +6,15 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_raises_regex from sklearn.neighbors import NearestNeighbors from sklearn.svm import SVC from imblearn.over_sampling import SMOTE + RND_SEED = 0 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [1.25192108, -0.22367336], [0.53366841, -0.30312976], @@ -31,8 +33,8 @@ def test_smote_wrong_kind(): kind = 'rnd' smote = SMOTE(kind=kind, random_state=RND_SEED) - assert_raises_regex(ValueError, "Unknown kind for SMOTE", - smote.fit_sample, X, Y) + with raises(ValueError, match="Unknown kind for SMOTE"): + smote.fit_sample(X, Y) def test_sample_regular(): @@ -203,19 +205,19 @@ def test_wrong_nn(): nn_k = NearestNeighbors(n_neighbors=6) smote = SMOTE( random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m) - assert_raises_regex(ValueError, "has to be one of", - smote.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + smote.fit_sample(X, Y) nn_k = 'rnd' nn_m = NearestNeighbors(n_neighbors=10) smote = SMOTE( random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m) - assert_raises_regex(ValueError, "has to be one of", - smote.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + smote.fit_sample(X, Y) kind = 'regular' nn_k = 'rnd' smote = SMOTE(random_state=RND_SEED, kind=kind, k_neighbors=nn_k) - assert_raises_regex(ValueError, "has to be one of", - smote.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + smote.fit_sample(X, Y) def test_sample_regular_with_nn_svm(): @@ -250,5 +252,5 @@ def test_sample_regular_wrong_svm(): smote = SMOTE( random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm) - assert_raises_regex(ValueError, "has to be one of", - smote.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + smote.fit_sample(X, Y) diff --git a/imblearn/tests/test_exceptions.py b/imblearn/tests/test_exceptions.py index 9ce907f47..91ca4576a 100644 --- a/imblearn/tests/test_exceptions.py +++ b/imblearn/tests/test_exceptions.py @@ -3,13 +3,12 @@ # Christos Aridas # License: MIT - -from sklearn.utils.testing import assert_raises_regex +from pytest import raises from imblearn.exceptions import raise_isinstance_error def test_raise_isinstance_error(): var = 10.0 - assert_raises_regex(ValueError, "has to be one of", - raise_isinstance_error, 'var', [int], var) + with raises(ValueError, match="has to be one of"): + raise_isinstance_error('var', [int], var) diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py index 8c581781f..046fee903 100644 --- a/imblearn/tests/test_pipeline.py +++ b/imblearn/tests/test_pipeline.py @@ -11,9 +11,8 @@ import time import numpy as np -from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_raise_message +from pytest import raises + from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose @@ -33,6 +32,7 @@ from imblearn.under_sampling import (RandomUnderSampler, EditedNearestNeighbours as ENN) + JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", "the pizza burger beer copyright", @@ -176,13 +176,13 @@ def transform(self, X, y=None): def test_pipeline_init(): # Test the various init parameters of the pipeline. - assert_raises(TypeError, Pipeline) + with raises(TypeError): + Pipeline() # Check that we can't instantiate pipelines with objects without fit # method - assert_raises_regex(TypeError, - 'Last step of Pipeline should implement fit. ' - '.*NoFit.*', - Pipeline, [('clf', NoFit())]) + error_regex = 'Last step of Pipeline should implement fit. .*NoFit.*' + with raises(TypeError, match=error_regex): + Pipeline([('clf', NoFit())]) # Smoke test with only an estimator clf = NoTrans() pipe = Pipeline([('svc', clf)]) @@ -204,9 +204,9 @@ def test_pipeline_init(): # Check that we can't instantiate with non-transformers on the way # Note that NoTrans implements fit, but not transform - assert_raises_regex(TypeError, - 'implement fit and transform or sample', - Pipeline, [('t', NoTrans()), ('svc', clf)]) + error_regex = 'implement fit and transform or sample' + with raises(TypeError, match=error_regex): + Pipeline([('t', NoTrans()), ('svc', clf)]) # Check that params are set pipe.set_params(svc__C=0.1) @@ -215,7 +215,8 @@ def test_pipeline_init(): repr(pipe) # Check that params are not set when naming them wrong - assert_raises(ValueError, pipe.set_params, anova__C=0.1) + with raises(ValueError): + pipe.set_params(anova__C=0.1) # Test clone pipe2 = clone(pipe) @@ -265,11 +266,8 @@ def test_pipeline_fit_params(): assert pipe.named_steps['transf'].a is None assert pipe.named_steps['transf'].b is None # invalid parameters should raise an error message - assert_raise_message( - TypeError, - "fit() got an unexpected keyword argument 'bad'", - pipe.fit, None, None, clf__bad=True - ) + with raises(TypeError, match="unexpected keyword argument"): + pipe.fit(None, None, clf__bad=True) def test_pipeline_sample_weight_supported(): @@ -290,32 +288,19 @@ def test_pipeline_sample_weight_unsupported(): pipe.fit(X, y=None) assert pipe.score(X) == 3 assert pipe.score(X, sample_weight=None) == 3 - assert_raise_message( - TypeError, - "score() got an unexpected keyword argument 'sample_weight'", - pipe.score, X, sample_weight=np.array([2, 3]) - ) + with raises(TypeError, match="unexpected keyword argument"): + pipe.score(X, sample_weight=np.array([2, 3])) def test_pipeline_raise_set_params_error(): # Test pipeline raises set params error message for nested models. pipe = Pipeline([('cls', LinearRegression())]) - - # expected error message - error_msg = ('Invalid parameter %s for estimator %s. ' - 'Check the list of available parameters ' - 'with `estimator.get_params().keys()`.') - - assert_raise_message(ValueError, - error_msg % ('fake', 'Pipeline'), - pipe.set_params, - fake='nope') + with raises(ValueError, match="Invalid parameter"): + pipe.set_params(fake='nope') # nested model check - assert_raise_message(ValueError, - error_msg % ("fake", pipe), - pipe.set_params, - fake__estimator='nope') + with raises(ValueError, match="Invalid parameter"): + pipe.set_params(fake__estimator='nope') def test_pipeline_methods_pca_svm(): @@ -397,9 +382,9 @@ def test_fit_predict_on_pipeline_without_fit_predict(): scaler = StandardScaler() pca = PCA(svd_solver='full') pipe = Pipeline([('scaler', scaler), ('pca', pca)]) - assert_raises_regex(AttributeError, - "'PCA' object has no attribute 'fit_predict'", - getattr, pipe, 'fit_predict') + error_regex = "'PCA' object has no attribute 'fit_predict'" + with raises(AttributeError, match=error_regex): + getattr(pipe, 'fit_predict') def test_fit_predict_with_intermediate_fit_params(): @@ -471,8 +456,10 @@ def test_set_pipeline_steps(): # With invalid data pipeline.set_params(steps=[('junk', ())]) - assert_raises(TypeError, pipeline.fit, [[1]], [1]) - assert_raises(TypeError, pipeline.fit_transform, [[1]], [1]) + with raises(TypeError): + pipeline.fit([[1]], [1]) + with raises(TypeError): + pipeline.fit_transform([[1]], [1]) def test_set_pipeline_step_none(): @@ -534,9 +521,8 @@ def make(): assert_array_equal([[exp]], pipeline.fit(X, y).transform(X)) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) - assert_raise_message(AttributeError, - "'NoneType' object has no attribute 'predict'", - getattr, pipeline, 'predict') + with raises(AttributeError, match="has no attribute 'predict'"): + getattr(pipeline, 'predict') # Check None step at construction time exp = 2 * 5 @@ -595,10 +581,12 @@ def test_classes_property(): reg = make_pipeline(SelectKBest(k=1), LinearRegression()) reg.fit(X, y) - assert_raises(AttributeError, getattr, reg, "classes_") + with raises(AttributeError): + getattr(reg, "classes_") clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0)) - assert_raises(AttributeError, getattr, clf, "classes_") + with raises(AttributeError): + getattr(clf, "classes_") clf.fit(X, y) assert_array_equal(clf.classes_, np.unique(y)) @@ -613,9 +601,10 @@ def test_pipeline_wrong_memory(): memory = 1 cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())], memory=memory) - assert_raises_regex(ValueError, "'memory' should either be a string or a" - " joblib.Memory instance, got 'memory=1' instead.", - cached_pipe.fit, X, y) + error_regex = ("'memory' should either be a string or a joblib.Memory" + " instance, got 'memory=1' instead.") + with raises(ValueError, match=error_regex): + cached_pipe.fit(X, y) def test_pipeline_memory_transformer(): @@ -1018,9 +1007,8 @@ def test_pipeline_with_step_that_implements_both_sample_and_transform(): random_state=0) clf = LogisticRegression() - assert_raises(TypeError, Pipeline, [('step', FitTransformSample()), - ('logistic', clf)]) - # assert_raises(TypeError, lambda x: [][0]) + with raises(TypeError): + Pipeline([('step', FitTransformSample()), ('logistic', clf)]) def test_pipeline_with_step_that_it_is_pipeline(): @@ -1041,7 +1029,8 @@ def test_pipeline_with_step_that_it_is_pipeline(): rus = RandomUnderSampler(random_state=0) filter1 = SelectKBest(f_classif, k=2) pipe1 = Pipeline([('rus', rus), ('anova', filter1)]) - assert_raises(TypeError, Pipeline, [('pipe1', pipe1), ('logistic', clf)]) + with raises(TypeError): + Pipeline([('pipe1', pipe1), ('logistic', clf)]) def test_pipeline_fit_then_sample_with_sampler_last_estimator(): diff --git a/imblearn/under_sampling/prototype_generation/tests/test_cluster_centroids.py b/imblearn/under_sampling/prototype_generation/tests/test_cluster_centroids.py index a42cb2e19..160e0df69 100644 --- a/imblearn/under_sampling/prototype_generation/tests/test_cluster_centroids.py +++ b/imblearn/under_sampling/prototype_generation/tests/test_cluster_centroids.py @@ -4,9 +4,11 @@ from collections import Counter import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises_regex + from sklearn.cluster import KMeans from imblearn.under_sampling import ClusterCentroids @@ -79,5 +81,5 @@ def test_fit_sample_wrong_object(): cluster = 'rnd' cc = ClusterCentroids( ratio=ratio, random_state=RND_SEED, estimator=cluster) - assert_raises_regex(ValueError, "has to be a KMeans clustering", - cc.fit_sample, X, Y) + with raises(ValueError, match="has to be a KMeans clustering"): + cc.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_allknn.py b/imblearn/under_sampling/prototype_selection/tests/test_allknn.py index 8208104c3..59207dcaf 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_allknn.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_allknn.py @@ -6,13 +6,15 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_allclose, assert_array_equal -from sklearn.utils.testing import assert_raises from sklearn.neighbors import NearestNeighbors from sklearn.datasets import make_classification from imblearn.under_sampling import AllKNN + RND_SEED = 0 X = np.array([[-0.12840393, 0.66446571], [1.32319756, -0.13181616], [0.04296502, -0.37981873], [0.83631853, 0.18569783], @@ -171,4 +173,5 @@ def test_allknn_fit_sample_with_nn_object(): def test_alknn_not_good_object(): nn = 'rnd' allknn = AllKNN(n_neighbors=nn, random_state=RND_SEED, kind_sel='mode') - assert_raises(ValueError, allknn.fit_sample, X, Y) + with raises(ValueError): + allknn.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_condensed_nearest_neighbour.py b/imblearn/under_sampling/prototype_selection/tests/test_condensed_nearest_neighbour.py index 360422c70..28917c38b 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_condensed_nearest_neighbour.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_condensed_nearest_neighbour.py @@ -7,7 +7,7 @@ import numpy as np from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises_regex +from pytest import raises from sklearn.neighbors import KNeighborsClassifier @@ -87,5 +87,5 @@ def test_cnn_fit_sample_with_object(): def test_cnn_fit_sample_with_wrong_object(): knn = 'rnd' cnn = CondensedNearestNeighbour(random_state=RND_SEED, n_neighbors=knn) - assert_raises_regex(ValueError, "has to be a int or an ", - cnn.fit_sample, X, Y) + with raises(ValueError, match="has to be a int or an "): + cnn.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_edited_nearest_neighbours.py b/imblearn/under_sampling/prototype_selection/tests/test_edited_nearest_neighbours.py index 6266840d1..c06895eab 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_edited_nearest_neighbours.py @@ -6,8 +6,9 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises_regex from sklearn.neighbors import NearestNeighbors @@ -102,5 +103,5 @@ def test_enn_not_good_object(): nn = 'rnd' enn = EditedNearestNeighbours( n_neighbors=nn, random_state=RND_SEED, kind_sel='mode') - assert_raises_regex(ValueError, "has to be one of", - enn.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + enn.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/prototype_selection/tests/test_instance_hardness_threshold.py index 528ec8a4a..021077a09 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_instance_hardness_threshold.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_instance_hardness_threshold.py @@ -6,13 +6,14 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_raises_regex from sklearn.ensemble import GradientBoostingClassifier from imblearn.under_sampling import InstanceHardnessThreshold + RND_SEED = 0 X = np.array([[-0.3879569, 0.6894251], [-0.09322739, 1.28177189], [-0.77740357, 0.74097941], [0.91542919, -0.65453327], @@ -31,7 +32,8 @@ def test_iht_wrong_estimator(): est = 'rnd' iht = InstanceHardnessThreshold( estimator=est, ratio=ratio, random_state=RND_SEED) - assert_raises(NotImplementedError, iht.fit_sample, X, Y) + with raises(NotImplementedError): + iht.fit_sample(X, Y) def test_iht_init(): @@ -272,5 +274,5 @@ def test_iht_fit_sample_wrong_class_obj(): from sklearn.cluster import KMeans est = KMeans() iht = InstanceHardnessThreshold(estimator=est, random_state=RND_SEED) - assert_raises_regex(ValueError, "Invalid parameter `estimator`", - iht.fit_sample, X, Y) + with raises(ValueError, match="Invalid parameter `estimator`"): + iht.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_nearmiss.py b/imblearn/under_sampling/prototype_selection/tests/test_nearmiss.py index 9424e94be..a50fca349 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_nearmiss.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_nearmiss.py @@ -6,12 +6,15 @@ from __future__ import print_function import numpy as np -from sklearn.utils.testing import assert_array_equal, assert_warns -from sklearn.utils.testing import assert_raises_regex +from pytest import raises + +from sklearn.utils.testing import assert_array_equal from sklearn.neighbors import NearestNeighbors from imblearn.under_sampling import NearMiss +from imblearn.utils.testing import warns + RND_SEED = 0 X = np.array([[1.17737838, -0.2002118], [0.4960075, 0.86130762], @@ -36,14 +39,15 @@ # FIXME remove at the end of the deprecation 0.4 def test_nearmiss_deprecation(): nm = NearMiss(ver3_samp_ngh=3, version=3) - assert_warns(DeprecationWarning, nm.fit_sample, X, Y) + with warns(DeprecationWarning, match="deprecated from 0.2"): + nm.fit_sample(X, Y) def test_nearmiss_wrong_version(): version = 1000 nm = NearMiss(version=version, random_state=RND_SEED) - assert_raises_regex(ValueError, "must be 1, 2 or 3", - nm.fit_sample, X, Y) + with raises(ValueError, match="must be 1, 2 or 3"): + nm.fit_sample(X, Y) def test_nm_wrong_nn_obj(): @@ -53,15 +57,15 @@ def test_nm_wrong_nn_obj(): version=VERSION_NEARMISS, return_indices=True, n_neighbors=nn) - assert_raises_regex(ValueError, "has to be one of", - nm.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + nm.fit_sample(X, Y) nn3 = 'rnd' nn = NearestNeighbors(n_neighbors=3) nm3 = NearMiss(ratio=ratio, random_state=RND_SEED, version=3, return_indices=True, n_neighbors=nn, n_neighbors_ver3=nn3) - assert_raises_regex(ValueError, "has to be one of", - nm3.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + nm3.fit_sample(X, Y) def test_nm_fit_sample_auto(): diff --git a/imblearn/under_sampling/prototype_selection/tests/test_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/prototype_selection/tests/test_neighbourhood_cleaning_rule.py index 220326abf..fb21f43db 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_neighbourhood_cleaning_rule.py @@ -3,10 +3,10 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np -from sklearn.utils.testing import assert_array_equal, assert_raises_regex +from pytest import raises + +from sklearn.utils.testing import assert_array_equal from sklearn.neighbors import NearestNeighbors @@ -26,15 +26,15 @@ def test_ncr_error(): threshold_cleaning = -10 - assert_raises_regex(ValueError, "'threshold_cleaning' is a value between" - " 0 and 1.", NeighbourhoodCleaningRule( - threshold_cleaning=threshold_cleaning).fit_sample, - X, Y) + with raises(ValueError, match=("'threshold_cleaning' is a value between" + " 0 and 1")): + NeighbourhoodCleaningRule( + threshold_cleaning=threshold_cleaning).fit_sample(X, Y) threshold_cleaning = 10 - assert_raises_regex(ValueError, "'threshold_cleaning' is a value between" - " 0 and 1.", NeighbourhoodCleaningRule( - threshold_cleaning=threshold_cleaning).fit_sample, - X, Y) + with raises(ValueError, match=("'threshold_cleaning' is a value between" + " 0 and 1")): + NeighbourhoodCleaningRule( + threshold_cleaning=threshold_cleaning).fit_sample(X, Y) def test_ncr_fit_sample(): @@ -124,5 +124,5 @@ def test_ncr_wrong_nn_obj(): nn = 'rnd' ncr = NeighbourhoodCleaningRule( return_indices=True, random_state=RND_SEED, n_neighbors=nn) - assert_raises_regex(ValueError, "has to be one of", - ncr.fit_sample, X, Y) + with raises(ValueError, match="has to be one of"): + ncr.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_one_sided_selection.py b/imblearn/under_sampling/prototype_selection/tests/test_one_sided_selection.py index 2ae1eadd4..e5faf886a 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_one_sided_selection.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_one_sided_selection.py @@ -6,8 +6,9 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises_regex from sklearn.neighbors import KNeighborsClassifier @@ -89,5 +90,5 @@ def test_oss_with_object(): def test_oss_with_wrong_object(): knn = 'rnd' oss = OneSidedSelection(random_state=RND_SEED, n_neighbors=knn) - assert_raises_regex(ValueError, "has to be a int", - oss.fit_sample, X, Y) + with raises(ValueError, match="has to be a int"): + oss.fit_sample(X, Y) diff --git a/imblearn/under_sampling/prototype_selection/tests/test_repeated_edited_nearest_neighbours.py b/imblearn/under_sampling/prototype_selection/tests/test_repeated_edited_nearest_neighbours.py index af0ff9c2e..dba4fb26a 100644 --- a/imblearn/under_sampling/prototype_selection/tests/test_repeated_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/prototype_selection/tests/test_repeated_edited_nearest_neighbours.py @@ -6,13 +6,15 @@ from __future__ import print_function import numpy as np +from pytest import raises + from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_raises from sklearn.neighbors import NearestNeighbors from imblearn.under_sampling import RepeatedEditedNearestNeighbours + RND_SEED = 0 X = np.array([[-0.12840393, 0.66446571], [1.32319756, -0.13181616], [0.04296502, -0.37981873], [0.83631853, 0.18569783], @@ -53,7 +55,8 @@ def test_renn_iter_wrong(): max_iter = -1 renn = RepeatedEditedNearestNeighbours( max_iter=max_iter, random_state=RND_SEED) - assert_raises(ValueError, renn.fit_sample, X, Y) + with raises(ValueError): + renn.fit_sample(X, Y) def test_renn_fit_sample(): @@ -177,4 +180,5 @@ def test_renn_not_good_object(): nn = 'rnd' renn = RepeatedEditedNearestNeighbours( n_neighbors=nn, random_state=RND_SEED, kind_sel='mode') - assert_raises(ValueError, renn.fit_sample, X, Y) + with raises(ValueError): + renn.fit_sample(X, Y) diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 9f7dd48a0..45a5c0b4e 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -12,13 +12,14 @@ from collections import Counter import numpy as np +from pytest import raises from sklearn.datasets import make_classification from sklearn.utils.estimator_checks import _yield_all_checks \ as sklearn_yield_all_checks, check_estimator \ as sklearn_check_estimator, check_parameters_default_constructible from sklearn.exceptions import NotFittedError -from sklearn.utils.testing import assert_warns, assert_raises_regex + from sklearn.utils.testing import set_random_state from imblearn.base import SamplerMixin @@ -26,6 +27,8 @@ from imblearn.under_sampling.base import BaseCleaningSampler, BaseUnderSampler from imblearn.ensemble.base import BaseEnsembleSampler +from imblearn.utils.testing import warns + def _yield_sampler_checks(name, Estimator): yield check_target_type @@ -75,15 +78,8 @@ def check_target_type(name, Estimator): y = np.linspace(0, 1, 20) estimator = Estimator() set_random_state(estimator) - assert_warns(UserWarning, estimator.fit, X, y) - - -def check_multiclass_warning(name, Estimator): - X = np.random.random((20, 2)) - y = np.array([0] * 3 + [1] * 2 + [2] * 15) - estimator = Estimator() - set_random_state(estimator) - assert_warns(UserWarning, estimator.fit, X, y) + with warns(UserWarning, match='should be of types'): + estimator.fit(X, y) def multioutput_estimator_convert_y_2d(name, y): @@ -170,8 +166,8 @@ def check_samplers_no_fit_error(name, Sampler): sampler = Sampler() X = np.random.random((20, 2)) y = np.array([1] * 5 + [0] * 15) - assert_raises_regex(NotFittedError, "instance is not fitted yet.", - sampler.sample, X, y) + with raises(NotFittedError, match="instance is not fitted yet."): + sampler.sample(X, y) def check_samplers_X_consistancy_sample(name, Sampler): @@ -181,8 +177,8 @@ def check_samplers_X_consistancy_sample(name, Sampler): sampler.fit(X, y) X_different = np.random.random((40, 2)) y_different = y = np.array([1] * 25 + [0] * 15) - assert_raises_regex(RuntimeError, "X and y need to be same array earlier", - sampler.sample, X_different, y_different) + with raises(RuntimeError, match="X and y need to be same array earlier"): + sampler.sample(X_different, y_different) def check_samplers_fit(name, Sampler): diff --git a/imblearn/utils/testing.py b/imblearn/utils/testing.py index d6190c1c7..6d5a980d4 100644 --- a/imblearn/utils/testing.py +++ b/imblearn/utils/testing.py @@ -6,13 +6,17 @@ import inspect import pkgutil +from contextlib import contextmanager +from re import compile from operator import itemgetter +from pytest import warns as _warns + +from sklearn.base import BaseEstimator -import imblearn from imblearn.base import SamplerMixin +import imblearn -from sklearn.base import BaseEstimator # meta-estimators need another estimator to be instantiated. META_ESTIMATORS = [] @@ -120,3 +124,47 @@ def is_abstract(c): # itemgetter is used to ensure the sort does not extend to the 2nd item of # the tuple return sorted(set(estimators), key=itemgetter(0)) + + +@contextmanager +def warns(expected_warning, match=None): + """Assert that a warning is raised with an optional matching pattern + + Assert that a code block/function call warns ``expected_warning`` + and raise a failure exception otherwise. It can be used within a context + manager ``with``. + + Parameters + ---------- + expected_warning : Warning + Warning type. + + match : regex str or None, optional + The pattern to be matched. By default, no check is done. + + Returns + ------- + None + + Examples + -------- + + >>> import warnings + >>> from imblearn.utils.testing import warns + >>> with warns(UserWarning, match=r'must be \d+$'): + ... warnings.warn("value must be 42", UserWarning) + + """ + with _warns(expected_warning) as record: + yield + + if match is not None: + for each in record: + if compile(match).search(str(each.message)) is not None: + break + else: + msg = "'{}' pattern not found in {}".format( + match, '{}'.format([str(r.message) for r in record])) + assert False, msg + else: + pass diff --git a/imblearn/utils/tests/test_deprecation.py b/imblearn/utils/tests/test_deprecation.py index 2fbc903a7..c6cd0e0d8 100644 --- a/imblearn/utils/tests/test_deprecation.py +++ b/imblearn/utils/tests/test_deprecation.py @@ -3,9 +3,8 @@ # Authors: Guillaume Lemaitre # License: MIT -from sklearn.utils.testing import assert_warns_message - from imblearn.utils.deprecation import deprecate_parameter +from imblearn.utils.testing import warns class Sampler(object): @@ -15,7 +14,7 @@ def __init__(self): def test_deprecate_parameter(): - assert_warns_message(DeprecationWarning, "is deprecated from", - deprecate_parameter, Sampler(), '0.2', 'a') - assert_warns_message(DeprecationWarning, "Use 'b' instead.", - deprecate_parameter, Sampler(), '0.2', 'a', 'b') + with warns(DeprecationWarning, match="is deprecated from"): + deprecate_parameter(Sampler(), '0.2', 'a') + with warns(DeprecationWarning, match="Use 'b' instead."): + deprecate_parameter(Sampler(), '0.2', 'a', 'b') diff --git a/imblearn/utils/tests/test_estimator_checks.py b/imblearn/utils/tests/test_estimator_checks.py index f423929f5..73a49a2da 100644 --- a/imblearn/utils/tests/test_estimator_checks.py +++ b/imblearn/utils/tests/test_estimator_checks.py @@ -1,11 +1,13 @@ """Estimator tests - adapted from scikit-learn""" +import sys + import scipy.sparse as sp import numpy as np -import sys +from pytest import raises + from sklearn.externals.six.moves import cStringIO as StringIO from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.testing import assert_raises_regex from sklearn.utils.validation import check_X_y, check_array from imblearn.utils.estimator_checks import check_estimator @@ -98,35 +100,40 @@ def test_check_estimator(): # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" - assert_raises_regex(TypeError, msg, check_estimator, object) + with raises(TypeError, match=msg): + check_estimator(object) + # check that we have a fit method msg = "object has no attribute 'fit'" - assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) + with raises(AttributeError, match=msg): + check_estimator(BaseEstimator) # check that fit does input validation msg = "TypeError not raised" - assert_raises_regex(AssertionError, msg, check_estimator, - BaseBadClassifier) + with raises(AssertionError, match=msg): + check_estimator(BaseBadClassifier) # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" - assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) + with raises(AssertionError, match=msg): + check_estimator(NoCheckinPredict) # check that estimator state does not change # at transform/predict/predict_proba time msg = 'Estimator changes __dict__ during predict' - assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict) + with raises(AssertionError, match=msg): + check_estimator(ChangesDict) # check that `fit` only changes attributes that # are private (start with an _ or end with a _). msg = ('Estimator changes public attribute\(s\) during the fit method.' ' Estimators are only allowed to change attributes started' ' or ended with _, but wrong_attribute changed') - assert_raises_regex(AssertionError, msg, - check_estimator, ChangesWrongAttribute) + with raises(AssertionError, match=msg): + check_estimator(ChangesWrongAttribute) # check that `fit` doesn't add any public attribute msg = ('Estimator adds public attribute\(s\) during the fit method.' ' Estimators are only allowed to add private attributes' ' either started with _ or ended' ' with _ but wrong_attribute added') - assert_raises_regex(AssertionError, msg, - check_estimator, SetsWrongAttribute) + with raises(AssertionError, match=msg): + check_estimator(SetsWrongAttribute) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = ("Estimator " + name + " doesn't seem to fail gracefully on" diff --git a/imblearn/utils/tests/test_testing.py b/imblearn/utils/tests/test_testing.py index d116018ec..1c1c1d3ec 100644 --- a/imblearn/utils/tests/test_testing.py +++ b/imblearn/utils/tests/test_testing.py @@ -3,11 +3,13 @@ # Christos Aridas # License: MIT -from sklearn.utils.testing import assert_raises_regex +from pytest import raises from imblearn.base import SamplerMixin from imblearn.utils.testing import all_estimators +from imblearn.utils.testing import warns + def test_all_estimators(): # check if the filtering is working with a list or a single string @@ -21,5 +23,28 @@ def test_all_estimators(): # check that an error is raised when the type is unknown type_filter = 'rnd' - assert_raises_regex(ValueError, "Parameter type_filter must be 'sampler'", - all_estimators, type_filter=type_filter) + with raises(ValueError, match="Parameter type_filter must be 'sampler'"): + all_estimators(type_filter=type_filter) + + +def test_warns(): + import warnings + + with warns(UserWarning, match=r'must be \d+$'): + warnings.warn("value must be 42", UserWarning) + + with raises(AssertionError, match='pattern not found'): + with warns(UserWarning, match=r'must be \d+$'): + warnings.warn("this is not here", UserWarning) + + with warns(UserWarning, match=r'aaa'): + warnings.warn("cccccccccc", UserWarning) + warnings.warn("bbbbbbbbbb", UserWarning) + warnings.warn("aaaaaaaaaa", UserWarning) + + a, b, c = ('aaa', 'bbbbbbbbbb', 'cccccccccc') + expected_msg = "'{}' pattern not found in \['{}', '{}'\]".format(a, b, c) + with raises(AssertionError, match=expected_msg): + with warns(UserWarning, match=r'aaa'): + warnings.warn("bbbbbbbbbb", UserWarning) + warnings.warn("cccccccccc", UserWarning) diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py index 62522f532..08b2f5145 100644 --- a/imblearn/utils/tests/test_validation.py +++ b/imblearn/utils/tests/test_validation.py @@ -5,13 +5,13 @@ from collections import Counter +from pytest import raises import numpy as np from sklearn.neighbors.base import KNeighborsMixin from sklearn.neighbors import NearestNeighbors -from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_warns_message +from imblearn.utils.testing import warns from imblearn.utils import check_neighbors_object from imblearn.utils import check_ratio @@ -29,20 +29,21 @@ def test_check_neighbors_object(): estimator = NearestNeighbors(n_neighbors) assert estimator is check_neighbors_object(name, estimator) n_neighbors = 'rnd' - assert_raises_regex(ValueError, "has to be one of", - check_neighbors_object, name, n_neighbors) + with raises(ValueError, match="has to be one of"): + check_neighbors_object(name, n_neighbors) def test_check_ratio_error(): - assert_raises_regex(ValueError, "'sampling_type' should be one of", - check_ratio, 'auto', np.array([1, 2, 3]), - 'rnd') - assert_raises_regex(ValueError, "The target 'y' needs to have more than 1" - " class.", check_ratio, 'auto', np.ones((10, )), - 'over-sampling') - assert_raises_regex(ValueError, "When 'ratio' is a string, it needs to be" - " one of", check_ratio, 'rnd', np.array([1, 2, 3]), - 'over-sampling') + with raises(ValueError, match="'sampling_type' should be one of"): + check_ratio('auto', np.array([1, 2, 3]), 'rnd') + + error_regex = "The target 'y' needs to have more than 1 class." + with raises(ValueError, match=error_regex): + check_ratio('auto', np.ones((10, )), 'over-sampling') + + error_regex = "When 'ratio' is a string, it needs to be one of" + with raises(ValueError, match=error_regex): + check_ratio('rnd', np.array([1, 2, 3]), 'over-sampling') def test_ratio_all_over_sampling(): @@ -58,9 +59,9 @@ def test_ratio_all_under_sampling(): def test_ratio_majority_over_sampling(): - assert_raises_regex(ValueError, "'ratio'='majority' cannot be used with" - " over-sampler.", check_ratio, 'majority', - np.array([1, 2, 3]), 'over-sampling') + error_regex = "'ratio'='majority' cannot be used with over-sampler." + with raises(ValueError, match=error_regex): + check_ratio('majority', np.array([1, 2, 3]), 'over-sampling') def test_ratio_majority_under_sampling(): @@ -90,30 +91,33 @@ def test_ratio_minority_over_sampling(): def test_ratio_minority_under_sampling(): - assert_raises_regex(ValueError, "'ratio'='minority' cannot be used with" - " under-sampler.", check_ratio, 'minority', - np.array([1, 2, 3]), 'under-sampling') + error_regex = "'ratio'='minority' cannot be used with under-sampler." + with raises(ValueError, match=error_regex): + check_ratio('minority', np.array([1, 2, 3]), 'under-sampling') def test_ratio_dict_error(): y = np.array([1] * 50 + [2] * 100 + [3] * 25) ratio = {1: -100, 2: 50, 3: 25} - assert_raises_regex(ValueError, "in a class cannot be negative.", - check_ratio, ratio, y, 'under-sampling') + with raises(ValueError, match="in a class cannot be negative."): + check_ratio(ratio, y, 'under-sampling') ratio = {10: 10} - assert_raises_regex(ValueError, "are not present in the data.", - check_ratio, ratio, y, 'over-sampling') + with raises(ValueError, match="are not present in the data."): + check_ratio(ratio, y, 'over-sampling') ratio = {1: 45, 2: 100, 3: 70} - assert_raises_regex(ValueError, "With over-sampling methods, the number" - " of samples in a class should be greater or equal" - " to the original number of samples. Originally," - " there is 50 samples and 45 samples are asked.", - check_ratio, ratio, y, 'over-sampling') - assert_raises_regex(ValueError, "With under-sampling methods, the number" - " of samples in a class should be less or equal" - " to the original number of samples. Originally," - " there is 25 samples and 70 samples are asked.", - check_ratio, ratio, y, 'under-sampling') + error_regex = ("With over-sampling methods, the number of samples in a" + " class should be greater or equal to the original number" + " of samples. Originally, there is 50 samples and 45" + " samples are asked.") + with raises(ValueError, match=error_regex): + check_ratio(ratio, y, 'over-sampling') + + error_regex = ("With under-sampling methods, the number of samples in a" + " class should be less or equal to the original number of" + " samples. Originally, there is 25 samples and 70 samples" + " are asked.") + with raises(ValueError, match=error_regex): + check_ratio(ratio, y, 'under-sampling') def test_ratio_dict_over_sampling(): @@ -122,10 +126,11 @@ def test_ratio_dict_over_sampling(): ratio_ = check_ratio(ratio, y, 'over-sampling') assert ratio_ == {1: 20, 2: 0, 3: 45} ratio = {1: 70, 2: 140, 3: 70} - assert_warns_message(UserWarning, "After over-sampling, the number of" - " samples (140) in class 2 will be larger than the" - " number of samples in the majority class (class #2" - " -> 100)", check_ratio, ratio, y, 'over-sampling') + expected_msg = ("After over-sampling, the number of samples \(140\) in" + " class 2 will be larger than the number of samples in the" + " majority class \(class #2 -> 100\)") + with warns(UserWarning, expected_msg): + check_ratio(ratio, y, 'over-sampling') def test_ratio_dict_under_sampling(): @@ -138,11 +143,12 @@ def test_ratio_dict_under_sampling(): def test_ratio_float_error(): y = np.array([1] * 50 + [2] * 100 + [3] * 25) ratio = -10 - assert_raises_regex(ValueError, "When 'ratio' is a float, it should in the" - " range", check_ratio, ratio, y, 'under-sampling') + error_regex = "When 'ratio' is a float, it should in the range" + with raises(ValueError, match=error_regex): + check_ratio(ratio, y, 'under-sampling') ratio = 10 - assert_raises_regex(ValueError, "When 'ratio' is a float, it should in the" - " range", check_ratio, ratio, y, 'under-sampling') + with raises(ValueError, match=error_regex): + check_ratio(ratio, y, 'under-sampling') def test_ratio_float_over_sampling():