scikit-learn-contrib · glemaitre · Aug 24, 2017 · Aug 17, 2017 · Aug 18, 2017 · Aug 21, 2017
diff --git a/doc/developers_utils.rst b/doc/developers_utils.rst
@@ -101,3 +101,45 @@ On the top of all the functionality provided by scikit-learn. Imbalance-learn
 provides :func:`deprecate_parameter`: which is used to deprecate a sampler's
 parameter (attribute) by another one.
 
+Testing utilities
+=================
+Currently, imbalanced-learn provide a warning management utility. This feature
+is going to be merge in pytest and will be removed when the pytest release will
+have it.
+
+If using Python 2.7 or above, you may use this function as a
+context manager::
+
+    >>> import warnings
+    >>> from imblearn.utils.testing import warns
+    >>> with warns(RuntimeWarning):
+    ...    warnings.warn("my runtime warning", RuntimeWarning)
+
+    >>> with warns(RuntimeWarning):
+    ...    pass
+    Traceback (most recent call last):
+      ...
+    Failed: DID NOT WARN. No warnings of type ...RuntimeWarning... was emitted...
+
+    >>> with warns(RuntimeWarning):
+    ...    warnings.warn(UserWarning)
+    Traceback (most recent call last):
+      ...
+    Failed: DID NOT WARN. No warnings of type ...RuntimeWarning... was emitted...
+
+In the context manager form you may use the keyword argument ``match`` to assert
+that the exception matches a text or regex::
+
+    >>> import warnings
+    >>> from imblearn.utils.testing import warns
+    >>> with warns(UserWarning, match='must be 0 or None'):
+    ...     warnings.warn("value must be 0 or None", UserWarning)
+
+    >>> with warns(UserWarning, match=r'must be \d+$'):
+    ...     warnings.warn("value must be 42", UserWarning)
+
+    >>> with warns(UserWarning, match=r'must be \d+$'):
+    ...     warnings.warn("this is not here", UserWarning)
+    Traceback (most recent call last):
+      ...
+    AssertionError: 'must be \d+$' pattern not found in ['this is not here']
diff --git a/imblearn/combine/tests/test_smote_enn.py b/imblearn/combine/tests/test_smote_enn.py
@@ -6,8 +6,9 @@
 from __future__ import print_function
 
 import numpy as np
+from pytest import raises
+
 from sklearn.utils.testing import assert_allclose, assert_array_equal
-from sklearn.utils.testing import assert_raises_regex
 
 from imblearn.combine import SMOTEENN
 from imblearn.under_sampling import EditedNearestNeighbours
@@ -113,8 +114,8 @@ def test_error_wrong_object():
     smote = 'rnd'
     enn = 'rnd'
     smt = SMOTEENN(smote=smote, random_state=RND_SEED)
-    assert_raises_regex(ValueError, "smote needs to be a SMOTE",
-                        smt.fit_sample, X, Y)
+    with raises(ValueError, match="smote needs to be a SMOTE"):
+        smt.fit_sample(X, Y)
     smt = SMOTEENN(enn=enn, random_state=RND_SEED)
-    assert_raises_regex(ValueError, "enn needs to be an ",
-                        smt.fit_sample, X, Y)
+    with raises(ValueError, match="enn needs to be an "):
+        smt.fit_sample(X, Y)
diff --git a/imblearn/combine/tests/test_smote_tomek.py b/imblearn/combine/tests/test_smote_tomek.py
@@ -6,8 +6,9 @@
 from __future__ import print_function
 
 import numpy as np
+from pytest import raises
+
 from sklearn.utils.testing import assert_allclose, assert_array_equal
-from sklearn.utils.testing import assert_raises_regex
 
 from imblearn.combine import SMOTETomek
 from imblearn.over_sampling import SMOTE
@@ -156,8 +157,8 @@ def test_error_wrong_object():
     smote = 'rnd'
     tomek = 'rnd'
     smt = SMOTETomek(smote=smote, random_state=RND_SEED)
-    assert_raises_regex(ValueError, "smote needs to be a SMOTE",
-                        smt.fit_sample, X, Y)
+    with raises(ValueError, match="smote needs to be a SMOTE"):
+        smt.fit_sample(X, Y)
     smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
-    assert_raises_regex(ValueError, "tomek needs to be a TomekLinks",
-                        smt.fit_sample, X, Y)
+    with raises(ValueError, match="tomek needs to be a TomekLinks"):
+        smt.fit_sample(X, Y)
diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py
@@ -10,10 +10,11 @@
 
 import numpy as np
 
+from pytest import raises
+
 from sklearn.datasets import load_iris
-from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import assert_warns_message
 
+from imblearn.utils.testing import warns
 from imblearn.datasets import make_imbalance
 
 data = load_iris()
@@ -24,28 +25,28 @@ def test_make_imbalance_error():
     # we are reusing part of utils.check_ratio, however this is not cover in
     # the common tests so we will repeat it here
     ratio = {0: -100, 1: 50, 2: 50}
-    assert_raises_regex(ValueError, "in a class cannot be negative",
-                        make_imbalance, X, Y, ratio)
+    with raises(ValueError, match="in a class cannot be negative"):
+        make_imbalance(X, Y, ratio)
     ratio = {0: 10, 1: 70}
-    assert_raises_regex(ValueError, "should be less or equal to the original",
-                        make_imbalance, X, Y, ratio)
+    with raises(ValueError, match="should be less or equal to the original"):
+        make_imbalance(X, Y, ratio)
     y_ = np.zeros((X.shape[0], ))
     ratio = {0: 10}
-    assert_raises_regex(ValueError, "needs to have more than 1 class.",
-                        make_imbalance, X, y_, ratio)
+    with raises(ValueError, match="needs to have more than 1 class."):
+        make_imbalance(X, y_, ratio)
     ratio = 'random-string'
-    assert_raises_regex(ValueError, "has to be a dictionary or a function",
-                        make_imbalance, X, Y, ratio)
+    with raises(ValueError, match="has to be a dictionary or a function"):
+        make_imbalance(X, Y, ratio)
 
 
 # FIXME: to be removed in 0.4 due to deprecation
 def test_make_imbalance_float():
-    X_, y_ = assert_warns_message(DeprecationWarning,
-                                  "'min_c_' is deprecated in 0.2",
-                                  make_imbalance, X, Y, ratio=0.5, min_c_=1)
-    X_, y_ = assert_warns_message(DeprecationWarning,
-                                  "'ratio' being a float is deprecated",
-                                  make_imbalance, X, Y, ratio=0.5, min_c_=1)
+    with warns(DeprecationWarning, match="deprecated in 0.2"):
+        X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1)
+
+    with warns(DeprecationWarning, match="'ratio' being a float"):
+        X_, y_ = make_imbalance(X, Y, ratio=0.5, min_c_=1)
+
     assert Counter(y_) == {0: 50, 1: 25, 2: 50}
     # resample without using min_c_
     X_, y_ = make_imbalance(X_, y_, ratio=0.25, min_c_=None)

diff --git a/imblearn/datasets/tests/test_zenodo.py b/imblearn/datasets/tests/test_zenodo.py
@@ -8,7 +8,8 @@
 
 from imblearn.datasets import fetch_datasets
 from sklearn.utils.testing import SkipTest, assert_allclose
-from sklearn.utils.testing import assert_raises_regex
+
+from pytest import raises
 
 DATASET_SHAPE = {'ecoli': (336, 7),
                  'optical_digits': (5620, 64),
@@ -84,11 +85,11 @@ def test_fetch_filter():
 
 
 def test_fetch_error():
-    assert_raises_regex(ValueError, 'is not a dataset available.',
-                        fetch_datasets, filter_data=tuple(['rnd']))
-    assert_raises_regex(ValueError, 'dataset with the ID=',
-                        fetch_datasets, filter_data=tuple([-1]))
-    assert_raises_regex(ValueError, 'dataset with the ID=',
-                        fetch_datasets, filter_data=tuple([100]))
-    assert_raises_regex(ValueError, 'value in the tuple',
-                        fetch_datasets, filter_data=tuple([1.00]))
+    with raises(ValueError, match='is not a dataset available.'):
+        fetch_datasets(filter_data=tuple(['rnd']))
+    with raises(ValueError, match='dataset with the ID='):
+        fetch_datasets(filter_data=tuple([-1]))
+    with raises(ValueError, match='dataset with the ID='):
+        fetch_datasets(filter_data=tuple([100]))
+    with raises(ValueError, match='value in the tuple'):
+        fetch_datasets(filter_data=tuple([1.00]))
diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py
@@ -6,12 +6,15 @@
 from __future__ import print_function
 
 import numpy as np
-from sklearn.utils.testing import assert_array_equal, assert_raises
-from sklearn.utils.testing import assert_raises_regex
+
+from pytest import raises
+
+from sklearn.utils.testing import assert_array_equal
 from sklearn.ensemble import RandomForestClassifier
 
 from imblearn.ensemble import BalanceCascade
 
+
 RND_SEED = 0
 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
               [1.25192108, -0.22367336], [0.53366841, -0.30312976],
@@ -299,7 +302,8 @@ def test_fit_sample_auto_linear_svm():
 def test_init_wrong_classifier():
     classifier = 'rnd'
     bc = BalanceCascade(classifier=classifier)
-    assert_raises(NotImplementedError, bc.fit_sample, X, Y)
+    with raises(NotImplementedError):
+        bc.fit_sample(X, Y)
 
 
 def test_fit_sample_auto_early_stop():
@@ -362,5 +366,5 @@ def test_give_classifier_wrong_obj():
     classifier = 2
     bc = BalanceCascade(ratio=ratio, random_state=RND_SEED,
                         return_indices=True, estimator=classifier)
-    assert_raises_regex(ValueError, "Invalid parameter `estimator`",
-                        bc.fit_sample, X, Y)
+    with raises(ValueError, match="Invalid parameter `estimator`"):
+        bc.fit_sample(X, Y)
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
@@ -10,16 +10,17 @@
 
 import numpy as np
 
+from pytest import approx, raises
+
 from sklearn import datasets
 from sklearn import svm
 
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.fixes import np_version
 from sklearn.utils.validation import check_random_state
 from sklearn.utils.testing import assert_allclose, assert_array_equal
-from sklearn.utils.testing import assert_no_warnings, assert_raises
-from sklearn.utils.testing import assert_warns_message, ignore_warnings
-from sklearn.utils.testing import assert_raise_message
+from sklearn.utils.testing import assert_no_warnings
+from sklearn.utils.testing import ignore_warnings
 from sklearn.metrics import accuracy_score, average_precision_score
 from sklearn.metrics import brier_score_loss, cohen_kappa_score
 from sklearn.metrics import jaccard_similarity_score, precision_score
@@ -32,7 +33,8 @@
 from imblearn.metrics import make_index_balanced_accuracy
 from imblearn.metrics import classification_report_imbalanced
 
-from pytest import approx
+from imblearn.utils.testing import warns
+
 
 RND_SEED = 42
 R_TOL = 1e-2
@@ -177,40 +179,30 @@ def test_sensitivity_specificity_error_multilabels():
     y_true_bin = label_binarize(y_true, classes=np.arange(5))
     y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
 
-    assert_raises(ValueError, sensitivity_score, y_true_bin, y_pred_bin)
+    with raises(ValueError):
+        sensitivity_score(y_true_bin, y_pred_bin)
 
 
 @ignore_warnings
 def test_sensitivity_specificity_support_errors():
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # Bad pos_label
-    assert_raises(
-        ValueError,
-        sensitivity_specificity_support,
-        y_true,
-        y_pred,
-        pos_label=2,
-        average='binary')
+    with raises(ValueError):
+        sensitivity_specificity_support(y_true, y_pred, pos_label=2,
+                                        average='binary')
 
     # Bad average option
-    assert_raises(
-        ValueError,
-        sensitivity_specificity_support, [0, 1, 2], [1, 2, 0],
-        average='mega')
+    with raises(ValueError):
+        sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega')
 
 
 def test_sensitivity_specificity_unused_pos_label():
     # but average != 'binary'; even if data is binary
-    assert_warns_message(
-        UserWarning,
-        "Note that pos_label (set to 2) is "
-        "ignored when average != 'binary' (got 'macro'). You "
-        "may use labels=[pos_label] to specify a single "
-        "positive class.",
-        sensitivity_specificity_support, [1, 2, 1], [1, 2, 2],
-        pos_label=2,
-        average='macro')
+    with warns(UserWarning, "use labels=\[pos_label\] to specify a single"):
+        sensitivity_specificity_support([1, 2, 1], [1, 2, 2],
+                                        pos_label=2,
+                                        average='macro')
 
 
 def test_geometric_mean_support_binary():
@@ -405,10 +397,8 @@ def test_classification_report_imbalanced_multiclass_with_unicode_label():
                        u'0.15 0.44 0.19 31 red\xa2 0.42 0.90 0.55 0.57 0.63 '
                        u'0.37 20 avg / total 0.51 0.53 0.80 0.47 0.62 0.41 75')
     if np_version[:3] < (1, 7, 0):
-        expected_message = ("NumPy < 1.7.0 does not implement"
-                            " searchsorted on unicode data correctly.")
-        assert_raise_message(RuntimeError, expected_message,
-                             classification_report_imbalanced, y_true, y_pred)
+        with raises(RuntimeError, match="NumPy < 1.7.0"):
+            classification_report_imbalanced(y_true, y_pred)
     else:
         report = classification_report_imbalanced(y_true, y_pred)
         assert _format_report(report) == expected_report
@@ -459,16 +449,20 @@ def test_iba_error_y_score_prob():
 
     aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(
         average_precision_score)
-    assert_raises(AttributeError, aps, y_true, y_pred)
+    with raises(AttributeError):
+        aps(y_true, y_pred)
 
     brier = make_index_balanced_accuracy(alpha=0.5, squared=True)(
         brier_score_loss)
-    assert_raises(AttributeError, brier, y_true, y_pred)
+    with raises(AttributeError):
+        brier(y_true, y_pred)
 
     kappa = make_index_balanced_accuracy(alpha=0.5, squared=True)(
         cohen_kappa_score)
-    assert_raises(AttributeError, kappa, y_true, y_pred)
+    with raises(AttributeError):
+        kappa(y_true, y_pred)
 
     ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(
         roc_auc_score)
-    assert_raises(AttributeError, ras, y_true, y_pred)
+    with raises(AttributeError):
+        ras(y_true, y_pred)
diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py
@@ -6,12 +6,14 @@
 from __future__ import print_function
 
 import numpy as np
+from pytest import raises
+
 from sklearn.utils.testing import assert_allclose, assert_array_equal
-from sklearn.utils.testing import assert_raises_regex
 from sklearn.neighbors import NearestNeighbors
 
 from imblearn.over_sampling import ADASYN
 
+
 RND_SEED = 0
 X = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
               [1.25192108, -0.22367336], [0.53366841, -0.30312976],
@@ -141,5 +143,5 @@ def test_ada_fit_sample_nn_obj():
 def test_ada_wrong_nn_obj():
     nn = 'rnd'
     ada = ADASYN(random_state=RND_SEED, n_neighbors=nn)
-    assert_raises_regex(ValueError, "has to be one of",
-                        ada.fit_sample, X, Y)
+    with raises(ValueError, match="has to be one of"):
+        ada.fit_sample(X, Y)