scikit-learn-contrib · glemaitre · Sep 7, 2018 · Sep 6, 2018 · Sep 7, 2018 · Sep 7, 2018
diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -104,6 +104,9 @@ Maintenance
 - Catch deprecation warning in testing.
   :issue:`441` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Refactor and impose `pytest` style tests.
+  :issue:`470` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Documentation
 .............
 

diff --git a/imblearn/combine/tests/test_smote_enn.py b/imblearn/combine/tests/test_smote_enn.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose, assert_array_equal
 
@@ -100,12 +98,12 @@ def test_validate_estimator_default():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_error_wrong_object():
-    smote = 'rnd'
-    enn = 'rnd'
-    smt = SMOTEENN(smote=smote, random_state=RND_SEED)
-    with raises(ValueError, match="smote needs to be a SMOTE"):
-        smt.fit_resample(X, Y)
-    smt = SMOTEENN(enn=enn, random_state=RND_SEED)
-    with raises(ValueError, match="enn needs to be an "):
+@pytest.mark.parametrize(
+    "smote_params, err_msg",
+    [({'smote': 'rnd'}, "smote needs to be a SMOTE"),
+     ({'enn': 'rnd'}, "enn needs to be an ")]
+)
+def test_error_wrong_object(smote_params, err_msg):
+    smt = SMOTEENN(**smote_params)
+    with pytest.raises(ValueError, match=err_msg):
         smt.fit_resample(X, Y)
diff --git a/imblearn/combine/tests/test_smote_tomek.py b/imblearn/combine/tests/test_smote_tomek.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose, assert_array_equal
 
@@ -106,12 +104,12 @@ def test_validate_estimator_default():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_error_wrong_object():
-    smote = 'rnd'
-    tomek = 'rnd'
-    smt = SMOTETomek(smote=smote, random_state=RND_SEED)
-    with raises(ValueError, match="smote needs to be a SMOTE"):
-        smt.fit_resample(X, Y)
-    smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
-    with raises(ValueError, match="tomek needs to be a TomekLinks"):
+@pytest.mark.parametrize(
+    "smote_params, err_msg",
+    [({'smote': 'rnd'}, "smote needs to be a SMOTE"),
+     ({'tomek': 'rnd'}, "tomek needs to be a TomekLinks")]
+)
+def test_error_wrong_object(smote_params, err_msg):
+    smt = SMOTETomek(**smote_params)
+    with pytest.raises(ValueError, match=err_msg):
         smt.fit_resample(X, Y)
diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py
@@ -3,64 +3,66 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 from collections import Counter
 
 import pytest
 import numpy as np
 
-from pytest import raises
-
 from sklearn.datasets import load_iris
 
 from imblearn.datasets import make_imbalance
 
-data = load_iris()
-X, Y = data.data, data.target
 
+@pytest.fixture
+def iris():
+    return load_iris(return_X_y=True)
 
-def test_make_imbalanced_backcompat():
+
+def test_make_imbalanced_backcompat(iris):
     # check an error is raised with we don't pass sampling_strategy and ratio
-    with raises(TypeError, match="missing 1 required positional argument"):
-        make_imbalance(X, Y)
+    with pytest.raises(TypeError, match="missing 1 required positional argument"):
+        make_imbalance(*iris)
 
 
-def test_make_imbalance_error():
+@pytest.mark.parametrize(
+    "sampling_strategy, err_msg",
+    [({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"),
+     ({0: 10, 1: 70}, "should be less or equal to the original"),
+     ('random-string', "has to be a dictionary or a function")]
+)
+def test_make_imbalance_error(iris, sampling_strategy, err_msg):
     # we are reusing part of utils.check_sampling_strategy, however this is not
     # cover in the common tests so we will repeat it here
-    sampling_strategy = {0: -100, 1: 50, 2: 50}
-    with raises(ValueError, match="in a class cannot be negative"):
-        make_imbalance(X, Y, sampling_strategy)
-    sampling_strategy = {0: 10, 1: 70}
-    with raises(ValueError, match="should be less or equal to the original"):
-        make_imbalance(X, Y, sampling_strategy)
-    y_ = np.zeros((X.shape[0], ))
-    sampling_strategy = {0: 10}
-    with raises(ValueError, match="needs to have more than 1 class."):
-        make_imbalance(X, y_, sampling_strategy)
-    sampling_strategy = 'random-string'
-    with raises(ValueError, match="has to be a dictionary or a function"):
-        make_imbalance(X, Y, sampling_strategy)
-
-
-def test_make_imbalance_dict():
-    sampling_strategy = {0: 10, 1: 20, 2: 30}
-    X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
-    assert Counter(y_) == sampling_strategy
-
-    sampling_strategy = {0: 10, 1: 20}
-    X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
-    assert Counter(y_) == {0: 10, 1: 20, 2: 50}
+    X, y = iris
+    with pytest.raises(ValueError, match=err_msg):
+        make_imbalance(X, y, sampling_strategy)
+
+
+def test_make_imbalance_error_single_class(iris):
+    X, y = iris
+    y = np.zeros_like(y)
+    with pytest.raises(ValueError, match="needs to have more than 1 class."):
+        make_imbalance(X, y, {0: 10})
+
+
+@pytest.mark.parametrize(
+    "sampling_strategy, expected_counts",
+    [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
+     ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
+)
+def test_make_imbalance_dict(iris, sampling_strategy, expected_counts):
+    X, y = iris
+    _, y_ = make_imbalance(X, y, sampling_strategy=sampling_strategy)
+    assert Counter(y_) == expected_counts
 
 
 @pytest.mark.filterwarnings("ignore:'ratio' has been deprecated in 0.4")
-def test_make_imbalance_ratio():
-    # check that using 'ratio' is working
-    sampling_strategy = {0: 10, 1: 20, 2: 30}
-    X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
-    assert Counter(y_) == sampling_strategy
-
-    sampling_strategy = {0: 10, 1: 20}
-    X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
-    assert Counter(y_) == {0: 10, 1: 20, 2: 50}
+@pytest.mark.parametrize(
+    "sampling_strategy, expected_counts",
+    [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
+     ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
+)
+def test_make_imbalance_dict_ratio(iris, sampling_strategy, expected_counts):
+    X, y = iris
+    _, y_ = make_imbalance(X, y, ratio=sampling_strategy)
+    assert Counter(y_) == expected_counts
diff --git a/imblearn/datasets/tests/test_zenodo.py b/imblearn/datasets/tests/test_zenodo.py
@@ -6,10 +6,10 @@
 #          Christos Aridas
 # License: MIT
 
-from imblearn.datasets import fetch_datasets
-from sklearn.utils.testing import SkipTest, assert_allclose
+import pytest
 
-from pytest import raises
+from imblearn.datasets import fetch_datasets
+from sklearn.utils.testing import SkipTest
 
 DATASET_SHAPE = {
     'ecoli': (336, 7),
@@ -79,19 +79,20 @@ def test_fetch_filter():
     assert DATASET_SHAPE['ecoli'] == X1.shape
     assert X1.shape == X2.shape
 
-    assert_allclose(X1.sum(), X2.sum())
+    assert X1.sum() == pytest.approx(X2.sum())
 
     y1, y2 = datasets1['ecoli'].target, datasets2['ecoli'].target
     assert (X1.shape[0], ) == y1.shape
     assert (X1.shape[0], ) == y2.shape
 
 
-def test_fetch_error():
-    with raises(ValueError, match='is not a dataset available.'):
-        fetch_datasets(filter_data=tuple(['rnd']))
-    with raises(ValueError, match='dataset with the ID='):
-        fetch_datasets(filter_data=tuple([-1]))
-    with raises(ValueError, match='dataset with the ID='):
-        fetch_datasets(filter_data=tuple([100]))
-    with raises(ValueError, match='value in the tuple'):
-        fetch_datasets(filter_data=tuple([1.00]))
+@pytest.mark.parametrize(
+    "filter_data, err_msg",
+    [(('rnf',), "is not a dataset available"),
+     ((-1,), "dataset with the ID="),
+     ((100,), "dataset with the ID="),
+     ((1.00,), "value in the tuple")]
+)
+def test_fetch_error(filter_data, err_msg):
+    with pytest.raises(ValueError, match=err_msg):
+        fetch_datasets(filter_data=filter_data)
diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 
 from pytest import raises

diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
@@ -18,9 +18,13 @@
 from imblearn.keras import BalancedBatchGenerator
 from imblearn.keras import balanced_batch_generator
 
-iris = load_iris()
-X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
-y = to_categorical(y, 3)
+
+@pytest.fixture
+def data():
+    iris = load_iris()
+    X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    y = to_categorical(y, 3)
+    return X, y
 
 
 def _build_keras_model(n_classes, n_features):
@@ -31,19 +35,20 @@ def _build_keras_model(n_classes, n_features):
     return model
 
 
-def test_balanced_batch_generator_class_no_return_indices():
+def test_balanced_batch_generator_class_no_return_indices(data):
     with pytest.raises(ValueError, match='needs to return the indices'):
-        BalancedBatchGenerator(X, y, sampler=ClusterCentroids(), batch_size=10)
+        BalancedBatchGenerator(*data, sampler=ClusterCentroids(), batch_size=10)
 
 
 @pytest.mark.parametrize(
     "sampler, sample_weight",
     [(None, None),
      (RandomOverSampler(), None),
      (NearMiss(), None),
-     (None, np.random.uniform(size=(y.shape[0])))]
+     (None, np.random.uniform(size=120))]
 )
-def test_balanced_batch_generator_class(sampler, sample_weight):
+def test_balanced_batch_generator_class(data, sampler, sample_weight):
+    X, y = data
     model = _build_keras_model(y.shape[1], X.shape[1])
     training_generator = BalancedBatchGenerator(X, y,
                                                 sample_weight=sample_weight,
@@ -55,33 +60,35 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
 
 
 @pytest.mark.parametrize("keep_sparse", [True, False])
-def test_balanced_batch_generator_class_sparse(keep_sparse):
+def test_balanced_batch_generator_class_sparse(data, keep_sparse):
+    X, y = data
     training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
                                                 batch_size=10,
                                                 keep_sparse=keep_sparse,
                                                 random_state=42)
     for idx in range(len(training_generator)):
-        X_batch, y_batch = training_generator.__getitem__(idx)
+        X_batch, _ = training_generator.__getitem__(idx)
         if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
             assert not sparse.issparse(X_batch)
 
 
-def test_balanced_batch_generator_function_no_return_indices():
+def test_balanced_batch_generator_function_no_return_indices(data):
     with pytest.raises(ValueError, match='needs to return the indices'):
         balanced_batch_generator(
-            X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42)
+            *data, sampler=ClusterCentroids(), batch_size=10, random_state=42)
 
 
 @pytest.mark.parametrize(
     "sampler, sample_weight",
     [(None, None),
      (RandomOverSampler(), None),
      (NearMiss(), None),
-     (None, np.random.uniform(size=(y.shape[0])))]
+     (None, np.random.uniform(size=120))]
 )
-def test_balanced_batch_generator_function(sampler, sample_weight):
+def test_balanced_batch_generator_function(data, sampler, sample_weight):
+    X, y = data
     model = _build_keras_model(y.shape[1], X.shape[1])
     training_generator, steps_per_epoch = balanced_batch_generator(
         X, y, sample_weight=sample_weight, sampler=sampler, batch_size=10,
@@ -92,12 +99,13 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
 
 
 @pytest.mark.parametrize("keep_sparse", [True, False])
-def test_balanced_batch_generator_function_sparse(keep_sparse):
+def test_balanced_batch_generator_function_sparse(data, keep_sparse):
+    X, y = data
     training_generator, steps_per_epoch = balanced_batch_generator(
         sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
         random_state=42)
-    for idx in range(steps_per_epoch):
-        X_batch, y_batch = next(training_generator)
+    for _ in range(steps_per_epoch):
+        X_batch, _ = next(training_generator)
         if keep_sparse:
             assert sparse.issparse(X_batch)
         else: