diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
index 913bf3634..ff62c255a 100644
--- a/doc/whats_new/v0.0.4.rst
+++ b/doc/whats_new/v0.0.4.rst
@@ -104,6 +104,9 @@ Maintenance
 - Catch deprecation warning in testing.
   :issue:`441` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Refactor and impose `pytest` style tests.
+  :issue:`470` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Documentation
 .............
 
diff --git a/imblearn/combine/tests/test_smote_enn.py b/imblearn/combine/tests/test_smote_enn.py
index 39a16f10e..a40b77579 100644
--- a/imblearn/combine/tests/test_smote_enn.py
+++ b/imblearn/combine/tests/test_smote_enn.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose, assert_array_equal
 
@@ -100,12 +98,12 @@ def test_validate_estimator_default():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_error_wrong_object():
-    smote = 'rnd'
-    enn = 'rnd'
-    smt = SMOTEENN(smote=smote, random_state=RND_SEED)
-    with raises(ValueError, match="smote needs to be a SMOTE"):
-        smt.fit_resample(X, Y)
-    smt = SMOTEENN(enn=enn, random_state=RND_SEED)
-    with raises(ValueError, match="enn needs to be an "):
+@pytest.mark.parametrize(
+    "smote_params, err_msg",
+    [({'smote': 'rnd'}, "smote needs to be a SMOTE"),
+     ({'enn': 'rnd'}, "enn needs to be an ")]
+)
+def test_error_wrong_object(smote_params, err_msg):
+    smt = SMOTEENN(**smote_params)
+    with pytest.raises(ValueError, match=err_msg):
         smt.fit_resample(X, Y)
diff --git a/imblearn/combine/tests/test_smote_tomek.py b/imblearn/combine/tests/test_smote_tomek.py
index 2221bc463..b2c2ad999 100644
--- a/imblearn/combine/tests/test_smote_tomek.py
+++ b/imblearn/combine/tests/test_smote_tomek.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose, assert_array_equal
 
@@ -106,12 +104,12 @@ def test_validate_estimator_default():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_error_wrong_object():
-    smote = 'rnd'
-    tomek = 'rnd'
-    smt = SMOTETomek(smote=smote, random_state=RND_SEED)
-    with raises(ValueError, match="smote needs to be a SMOTE"):
-        smt.fit_resample(X, Y)
-    smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
-    with raises(ValueError, match="tomek needs to be a TomekLinks"):
+@pytest.mark.parametrize(
+    "smote_params, err_msg",
+    [({'smote': 'rnd'}, "smote needs to be a SMOTE"),
+     ({'tomek': 'rnd'}, "tomek needs to be a TomekLinks")]
+)
+def test_error_wrong_object(smote_params, err_msg):
+    smt = SMOTETomek(**smote_params)
+    with pytest.raises(ValueError, match=err_msg):
         smt.fit_resample(X, Y)
diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py
index 6776a6daf..18cd98e79 100644
--- a/imblearn/datasets/tests/test_imbalance.py
+++ b/imblearn/datasets/tests/test_imbalance.py
@@ -3,64 +3,66 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 from collections import Counter
 
 import pytest
 import numpy as np
 
-from pytest import raises
-
 from sklearn.datasets import load_iris
 
 from imblearn.datasets import make_imbalance
 
-data = load_iris()
-X, Y = data.data, data.target
 
+@pytest.fixture
+def iris():
+    return load_iris(return_X_y=True)
 
-def test_make_imbalanced_backcompat():
+
+def test_make_imbalanced_backcompat(iris):
     # check an error is raised with we don't pass sampling_strategy and ratio
-    with raises(TypeError, match="missing 1 required positional argument"):
-        make_imbalance(X, Y)
+    with pytest.raises(TypeError, match="missing 1 required positional argument"):
+        make_imbalance(*iris)
 
 
-def test_make_imbalance_error():
+@pytest.mark.parametrize(
+    "sampling_strategy, err_msg",
+    [({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"),
+     ({0: 10, 1: 70}, "should be less or equal to the original"),
+     ('random-string', "has to be a dictionary or a function")]
+)
+def test_make_imbalance_error(iris, sampling_strategy, err_msg):
     # we are reusing part of utils.check_sampling_strategy, however this is not
     # cover in the common tests so we will repeat it here
-    sampling_strategy = {0: -100, 1: 50, 2: 50}
-    with raises(ValueError, match="in a class cannot be negative"):
-        make_imbalance(X, Y, sampling_strategy)
-    sampling_strategy = {0: 10, 1: 70}
-    with raises(ValueError, match="should be less or equal to the original"):
-        make_imbalance(X, Y, sampling_strategy)
-    y_ = np.zeros((X.shape[0], ))
-    sampling_strategy = {0: 10}
-    with raises(ValueError, match="needs to have more than 1 class."):
-        make_imbalance(X, y_, sampling_strategy)
-    sampling_strategy = 'random-string'
-    with raises(ValueError, match="has to be a dictionary or a function"):
-        make_imbalance(X, Y, sampling_strategy)
-
-
-def test_make_imbalance_dict():
-    sampling_strategy = {0: 10, 1: 20, 2: 30}
-    X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
-    assert Counter(y_) == sampling_strategy
-
-    sampling_strategy = {0: 10, 1: 20}
-    X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
-    assert Counter(y_) == {0: 10, 1: 20, 2: 50}
+    X, y = iris
+    with pytest.raises(ValueError, match=err_msg):
+        make_imbalance(X, y, sampling_strategy)
+
+
+def test_make_imbalance_error_single_class(iris):
+    X, y = iris
+    y = np.zeros_like(y)
+    with pytest.raises(ValueError, match="needs to have more than 1 class."):
+        make_imbalance(X, y, {0: 10})
+
+
+@pytest.mark.parametrize(
+    "sampling_strategy, expected_counts",
+    [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
+     ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
+)
+def test_make_imbalance_dict(iris, sampling_strategy, expected_counts):
+    X, y = iris
+    _, y_ = make_imbalance(X, y, sampling_strategy=sampling_strategy)
+    assert Counter(y_) == expected_counts
 
 
 @pytest.mark.filterwarnings("ignore:'ratio' has been deprecated in 0.4")
-def test_make_imbalance_ratio():
-    # check that using 'ratio' is working
-    sampling_strategy = {0: 10, 1: 20, 2: 30}
-    X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
-    assert Counter(y_) == sampling_strategy
-
-    sampling_strategy = {0: 10, 1: 20}
-    X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
-    assert Counter(y_) == {0: 10, 1: 20, 2: 50}
+@pytest.mark.parametrize(
+    "sampling_strategy, expected_counts",
+    [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
+     ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
+)
+def test_make_imbalance_dict_ratio(iris, sampling_strategy, expected_counts):
+    X, y = iris
+    _, y_ = make_imbalance(X, y, ratio=sampling_strategy)
+    assert Counter(y_) == expected_counts
diff --git a/imblearn/datasets/tests/test_zenodo.py b/imblearn/datasets/tests/test_zenodo.py
index 7d95784ec..28d0a06cc 100644
--- a/imblearn/datasets/tests/test_zenodo.py
+++ b/imblearn/datasets/tests/test_zenodo.py
@@ -6,10 +6,10 @@
 #          Christos Aridas
 # License: MIT
 
-from imblearn.datasets import fetch_datasets
-from sklearn.utils.testing import SkipTest, assert_allclose
+import pytest
 
-from pytest import raises
+from imblearn.datasets import fetch_datasets
+from sklearn.utils.testing import SkipTest
 
 DATASET_SHAPE = {
     'ecoli': (336, 7),
@@ -79,19 +79,20 @@ def test_fetch_filter():
     assert DATASET_SHAPE['ecoli'] == X1.shape
     assert X1.shape == X2.shape
 
-    assert_allclose(X1.sum(), X2.sum())
+    assert X1.sum() == pytest.approx(X2.sum())
 
     y1, y2 = datasets1['ecoli'].target, datasets2['ecoli'].target
     assert (X1.shape[0], ) == y1.shape
     assert (X1.shape[0], ) == y2.shape
 
 
-def test_fetch_error():
-    with raises(ValueError, match='is not a dataset available.'):
-        fetch_datasets(filter_data=tuple(['rnd']))
-    with raises(ValueError, match='dataset with the ID='):
-        fetch_datasets(filter_data=tuple([-1]))
-    with raises(ValueError, match='dataset with the ID='):
-        fetch_datasets(filter_data=tuple([100]))
-    with raises(ValueError, match='value in the tuple'):
-        fetch_datasets(filter_data=tuple([1.00]))
+@pytest.mark.parametrize(
+    "filter_data, err_msg",
+    [(('rnf',), "is not a dataset available"),
+     ((-1,), "dataset with the ID="),
+     ((100,), "dataset with the ID="),
+     ((1.00,), "value in the tuple")]
+)
+def test_fetch_error(filter_data, err_msg):
+    with pytest.raises(ValueError, match=err_msg):
+        fetch_datasets(filter_data=filter_data)
diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py
index 334c6cd20..4c712d5eb 100644
--- a/imblearn/ensemble/tests/test_balance_cascade.py
+++ b/imblearn/ensemble/tests/test_balance_cascade.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 
 from pytest import raises
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index 4565652f9..f541bfd1a 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -18,9 +18,13 @@
 from imblearn.keras import BalancedBatchGenerator
 from imblearn.keras import balanced_batch_generator
 
-iris = load_iris()
-X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
-y = to_categorical(y, 3)
+
+@pytest.fixture
+def data():
+    iris = load_iris()
+    X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    y = to_categorical(y, 3)
+    return X, y
 
 
 def _build_keras_model(n_classes, n_features):
@@ -31,9 +35,9 @@ def _build_keras_model(n_classes, n_features):
     return model
 
 
-def test_balanced_batch_generator_class_no_return_indices():
+def test_balanced_batch_generator_class_no_return_indices(data):
     with pytest.raises(ValueError, match='needs to return the indices'):
-        BalancedBatchGenerator(X, y, sampler=ClusterCentroids(), batch_size=10)
+        BalancedBatchGenerator(*data, sampler=ClusterCentroids(), batch_size=10)
 
 
 @pytest.mark.parametrize(
@@ -41,9 +45,10 @@ def test_balanced_batch_generator_class_no_return_indices():
     [(None, None),
      (RandomOverSampler(), None),
      (NearMiss(), None),
-     (None, np.random.uniform(size=(y.shape[0])))]
+     (None, np.random.uniform(size=120))]
 )
-def test_balanced_batch_generator_class(sampler, sample_weight):
+def test_balanced_batch_generator_class(data, sampler, sample_weight):
+    X, y = data
     model = _build_keras_model(y.shape[1], X.shape[1])
     training_generator = BalancedBatchGenerator(X, y,
                                                 sample_weight=sample_weight,
@@ -55,23 +60,24 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
 
 
 @pytest.mark.parametrize("keep_sparse", [True, False])
-def test_balanced_batch_generator_class_sparse(keep_sparse):
+def test_balanced_batch_generator_class_sparse(data, keep_sparse):
+    X, y = data
     training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
                                                 batch_size=10,
                                                 keep_sparse=keep_sparse,
                                                 random_state=42)
     for idx in range(len(training_generator)):
-        X_batch, y_batch = training_generator.__getitem__(idx)
+        X_batch, _ = training_generator.__getitem__(idx)
         if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
             assert not sparse.issparse(X_batch)
 
 
-def test_balanced_batch_generator_function_no_return_indices():
+def test_balanced_batch_generator_function_no_return_indices(data):
     with pytest.raises(ValueError, match='needs to return the indices'):
         balanced_batch_generator(
-            X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42)
+            *data, sampler=ClusterCentroids(), batch_size=10, random_state=42)
 
 
 @pytest.mark.parametrize(
@@ -79,9 +85,10 @@ def test_balanced_batch_generator_function_no_return_indices():
     [(None, None),
      (RandomOverSampler(), None),
      (NearMiss(), None),
-     (None, np.random.uniform(size=(y.shape[0])))]
+     (None, np.random.uniform(size=120))]
 )
-def test_balanced_batch_generator_function(sampler, sample_weight):
+def test_balanced_batch_generator_function(data, sampler, sample_weight):
+    X, y = data
     model = _build_keras_model(y.shape[1], X.shape[1])
     training_generator, steps_per_epoch = balanced_batch_generator(
         X, y, sample_weight=sample_weight, sampler=sampler, batch_size=10,
@@ -92,12 +99,13 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
 
 
 @pytest.mark.parametrize("keep_sparse", [True, False])
-def test_balanced_batch_generator_function_sparse(keep_sparse):
+def test_balanced_batch_generator_function_sparse(data, keep_sparse):
+    X, y = data
     training_generator, steps_per_epoch = balanced_batch_generator(
         sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
         random_state=42)
-    for idx in range(steps_per_epoch):
-        X_batch, y_batch = next(training_generator)
+    for _ in range(steps_per_epoch):
+        X_batch, _ = next(training_generator)
         if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
index ae2585e13..664361eb7 100644
--- a/imblearn/metrics/tests/test_classification.py
+++ b/imblearn/metrics/tests/test_classification.py
@@ -4,14 +4,13 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import division, print_function
+from __future__ import division
 
 from functools import partial
 
 import numpy as np
 
 import pytest
-from pytest import approx, raises
 
 from sklearn import datasets
 from sklearn import svm
@@ -103,49 +102,40 @@ def test_sensitivity_specificity_score_binary():
     # individual scoring function that can be used for grid search: in the
     # binary class case the score is the value of the measure for the positive
     # class (e.g. label == 1). This is deprecated for average != 'binary'.
-    for kwargs, my_assert in [({}, assert_no_warnings), ({
-            'average': 'binary'
-    }, assert_no_warnings)]:
-        sen = my_assert(sensitivity_score, y_true, y_pred, **kwargs)
-        assert_allclose(sen, 0.68, rtol=R_TOL)
+    for kwargs in ({}, {'average': 'binary'}):
+        sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs)
+        assert sen == pytest.approx(0.68, rel=R_TOL)
 
-        spe = my_assert(specificity_score, y_true, y_pred, **kwargs)
-        assert_allclose(spe, 0.88, rtol=R_TOL)
+        spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs)
+        assert spe == pytest.approx(0.88, rel=R_TOL)
 
 
 @pytest.mark.filterwarnings("ignore:Specificity is ill-defined")
-def test_sensitivity_specificity_f_binary_single_class():
+@pytest.mark.parametrize(
+    "y_pred, expected_sensitivity, expected_specificity",
+    [(([1, 1], [1, 1]), 1.0, 0.0),
+     (([-1, -1], [-1, -1]), 0.0, 0.0)]
+)
+def test_sensitivity_specificity_f_binary_single_class(
+        y_pred, expected_sensitivity, expected_specificity):
     # Such a case may occur with non-stratified cross-validation
-    assert sensitivity_score([1, 1], [1, 1]) == 1.
-    assert specificity_score([1, 1], [1, 1]) == 0.
+    assert sensitivity_score(*y_pred) == expected_sensitivity
+    assert specificity_score(*y_pred) == expected_specificity
 
-    assert sensitivity_score([-1, -1], [-1, -1]) == 0.
-    assert specificity_score([-1, -1], [-1, -1]) == 0.
 
-
-def test_sensitivity_specificity_extra_labels():
+@pytest.mark.parametrize(
+    "average, expected_specificty",
+    [(None, [1., 0.67, 1., 1., 1.]),
+     ('macro', np.mean([1., 0.67, 1., 1., 1.])),
+     ('micro', 15 / 16)]
+)
+def test_sensitivity_specificity_extra_labels(average, expected_specificty):
     y_true = [1, 3, 3, 2]
     y_pred = [1, 1, 3, 2]
 
-    # No average: zeros in array
-    actual = specificity_score(
-        y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None)
-    assert_allclose([1., 0.67, 1., 1., 1.], actual, rtol=R_TOL)
-
-    # Macro average is changed
-    actual = specificity_score(
-        y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro')
-    assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL)
-
-    # Check for micro
-    actual = specificity_score(
-        y_true, y_pred, labels=[0, 1, 2, 3, 4], average='micro')
-    assert_allclose(15. / 16., actual, rtol=R_TOL)
-
-    # Check for weighted
     actual = specificity_score(
-        y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro')
-    assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL)
+        y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average)
+    assert_allclose(expected_specificty, actual, rtol=R_TOL)
 
 
 def test_sensitivity_specificity_ignored_labels():
@@ -176,7 +166,7 @@ def test_sensitivity_specificity_error_multilabels():
     y_true_bin = label_binarize(y_true, classes=np.arange(5))
     y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
 
-    with raises(ValueError):
+    with pytest.raises(ValueError):
         sensitivity_score(y_true_bin, y_pred_bin)
 
 
@@ -184,12 +174,12 @@ def test_sensitivity_specificity_support_errors():
     y_true, y_pred, _ = make_prediction(binary=True)
 
     # Bad pos_label
-    with raises(ValueError):
+    with pytest.raises(ValueError):
         sensitivity_specificity_support(
             y_true, y_pred, pos_label=2, average='binary')
 
     # Bad average option
-    with raises(ValueError):
+    with pytest.raises(ValueError):
         sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega')
 
 
@@ -210,105 +200,59 @@ def test_geometric_mean_support_binary():
 
 
 @pytest.mark.filterwarnings("ignore:Recall is ill-defined")
-def test_geometric_mean_multiclass():
-    y_true = [0, 0, 1, 1]
-    y_pred = [0, 0, 1, 1]
-    assert_allclose(geometric_mean_score(y_true, y_pred), 1.0, rtol=R_TOL)
-
-    y_true = [0, 0, 0, 0]
-    y_pred = [1, 1, 1, 1]
-    assert_allclose(geometric_mean_score(y_true, y_pred), 0.0, rtol=R_TOL)
-
-    cor = 0.001
-    y_true = [0, 0, 0, 0]
-    y_pred = [0, 0, 0, 0]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor), 1.0, rtol=R_TOL)
-
-    y_true = [0, 0, 0, 0]
-    y_pred = [1, 1, 1, 1]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor), cor, rtol=R_TOL)
-
-    y_true = [0, 0, 1, 1]
-    y_pred = [0, 1, 1, 0]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor), 0.5, rtol=R_TOL)
-
-    y_true = [0, 1, 2, 0, 1, 2]
-    y_pred = [0, 2, 1, 0, 0, 1]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor),
-        (1 * cor * cor) ** (1.0 / 3.0),
-        rtol=R_TOL)
-
-    y_true = [0, 1, 2, 3, 4, 5]
-    y_pred = [0, 1, 2, 3, 4, 5]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor), 1, rtol=R_TOL)
-
-    y_true = [0, 1, 1, 1, 1, 0]
-    y_pred = [0, 0, 1, 1, 1, 1]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, correction=cor),
-        (0.5 * 0.75) ** 0.5,
-        rtol=R_TOL)
-
-    y_true = [0, 1, 2, 0, 1, 2]
-    y_pred = [0, 2, 1, 0, 0, 1]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, average='macro'),
-        0.47140452079103168,
-        rtol=R_TOL)
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, average='micro'),
-        0.47140452079103168,
-        rtol=R_TOL)
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, average='weighted'),
-        0.47140452079103168,
-        rtol=R_TOL)
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, average=None),
-        [0.8660254, 0.0, 0.0],
-        rtol=R_TOL)
+@pytest.mark.parametrize(
+    "y_true, y_pred, correction, expected_gmean",
+    [([0, 0, 1, 1], [0, 0, 1, 1], 0.0, 1.0),
+     ([0, 0, 0, 0], [1, 1, 1, 1], 0.0, 0.0),
+     ([0, 0, 0, 0], [0, 0, 0, 0], 0.001, 1.0),
+     ([0, 0, 0, 0], [1, 1, 1, 1], 0.001, 0.001),
+     ([0, 0, 1, 1], [0, 1, 1, 0], 0.001, 0.5),
+     ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 0.001, (0.001 ** 2) ** (1 / 3)),
+     ([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], 0.001, 1),
+     ([0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], 0.001, (0.5 * 0.75) ** 0.5)]
+)
+def test_geometric_mean_multiclass(y_true, y_pred, correction, expected_gmean):
+    gmean = geometric_mean_score(y_true, y_pred, correction=correction)
+    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)
 
-    y_true = [0, 1, 2, 0, 1, 2]
-    y_pred = [0, 1, 1, 0, 0, 1]
-    assert_allclose(
-        geometric_mean_score(y_true, y_pred, labels=[0, 1]),
-        0.70710678118654752,
-        rtol=R_TOL)
-    assert_allclose(
-        geometric_mean_score(
-            y_true, y_pred, labels=[0, 1], sample_weight=[1, 2, 1, 1, 2, 1]),
-        0.70710678118654752,
-        rtol=R_TOL)
-    assert_allclose(
-        geometric_mean_score(
-            y_true,
-            y_pred,
-            labels=[0, 1],
-            sample_weight=[1, 2, 1, 1, 2, 1],
-            average='weighted'),
-        0.3333333333,
-        rtol=R_TOL)
 
+@pytest.mark.filterwarnings("ignore:Recall is ill-defined")
+@pytest.mark.parametrize(
+    "y_true, y_pred, average, expected_gmean",
+    [([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'macro', 0.471),
+     ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'micro', 0.471),
+     ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'weighted', 0.471),
+     ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], None, [0.8660254, 0.0, 0.0])]
+)
+def test_geometric_mean_average(y_true, y_pred, average, expected_gmean):
+    gmean = geometric_mean_score(y_true, y_pred, average=average)
+    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)
+
+
+@pytest.mark.parametrize(
+    "y_true, y_pred, sample_weight, average, expected_gmean",
+    [([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], None, 'multiclass', 0.707),
+     ([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], [1, 2, 1, 1, 2, 1], 'multiclass', 0.707),
+     ([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], [1, 2, 1, 1, 2, 1], 'weighted', 0.333)]
+)
+def test_geometric_mean_sample_weight(y_true, y_pred, sample_weight, average,
+                                      expected_gmean):
+    gmean = geometric_mean_score(y_true, y_pred, labels=[0, 1],
+                                 sample_weight=sample_weight,
+                                 average=average)
+    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)
+
+
+@pytest.mark.parametrize(
+    "average, expected_gmean",
+    [('multiclass', 0.41), (None, [0.85, 0.29, 0.7]),
+     ('macro', 0.68), ('weighted', 0.65)]
+)
+def test_geometric_mean_score_prediction(average, expected_gmean):
     y_true, y_pred, _ = make_prediction(binary=False)
 
-    geo_mean = geometric_mean_score(y_true, y_pred)
-    assert_allclose(geo_mean, 0.41, rtol=R_TOL)
-
-    # Compute the geometric mean for each of the classes
-    geo_mean = geometric_mean_score(y_true, y_pred, average=None)
-    assert_allclose(geo_mean, [0.85, 0.29, 0.7], rtol=R_TOL)
-
-    # average tests
-    geo_mean = geometric_mean_score(y_true, y_pred, average='macro')
-    assert_allclose(geo_mean, 0.68, rtol=R_TOL)
-
-    geo_mean = geometric_mean_score(y_true, y_pred, average='weighted')
-    assert_allclose(geo_mean, 0.65, rtol=R_TOL)
+    gmean = geometric_mean_score(y_true, y_pred, average=average)
+    assert gmean == pytest.approx(expected_gmean, rel=R_TOL)
 
 
 def test_iba_geo_mean_binary():
@@ -412,7 +356,7 @@ def test_classification_report_imbalanced_multiclass_with_unicode_label():
                        u'red¢ 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total '
                        u'0.51 0.53 0.80 0.47 0.58 0.40 75')
     if np_version[:3] < (1, 7, 0):
-        with raises(RuntimeError, match="NumPy < 1.7.0"):
+        with pytest.raises(RuntimeError, match="NumPy < 1.7.0"):
             classification_report_imbalanced(y_true, y_pred)
     else:
         report = classification_report_imbalanced(y_true, y_pred)
@@ -435,46 +379,28 @@ def test_classification_report_imbalanced_multiclass_with_long_string_label():
     assert _format_report(report) == expected_report
 
 
-def test_iba_sklearn_metrics():
+@pytest.mark.parametrize(
+    "score, expected_score",
+    [(accuracy_score, 0.54756), (jaccard_similarity_score, 0.54756),
+     (precision_score, 0.65025), (recall_score, 0.41616)]
+)
+def test_iba_sklearn_metrics(score, expected_score):
     y_true, y_pred, _ = make_prediction(binary=True)
 
-    acc = make_index_balanced_accuracy(alpha=0.5, squared=True)(accuracy_score)
-    score = acc(y_true, y_pred)
-    assert score == approx(0.54756)
-
-    jss = make_index_balanced_accuracy(
-        alpha=0.5, squared=True)(jaccard_similarity_score)
-    score = jss(y_true, y_pred)
-    assert score == approx(0.54756)
+    score_iba = make_index_balanced_accuracy(alpha=0.5, squared=True)(score)
+    score = score_iba(y_true, y_pred)
+    assert score == pytest.approx(expected_score)
 
-    pre = make_index_balanced_accuracy(
-        alpha=0.5, squared=True)(precision_score)
-    score = pre(y_true, y_pred)
-    assert score == approx(0.65025)
 
-    rec = make_index_balanced_accuracy(alpha=0.5, squared=True)(recall_score)
-    score = rec(y_true, y_pred)
-    assert score == approx(0.41616000000000009)
-
-
-def test_iba_error_y_score_prob():
+@pytest.mark.parametrize(
+    "score_loss",
+    [average_precision_score, brier_score_loss,
+     cohen_kappa_score, roc_auc_score]
+)
+def test_iba_error_y_score_prob_error(score_loss):
     y_true, y_pred, _ = make_prediction(binary=True)
 
     aps = make_index_balanced_accuracy(
-        alpha=0.5, squared=True)(average_precision_score)
-    with raises(AttributeError):
+        alpha=0.5, squared=True)(score_loss)
+    with pytest.raises(AttributeError):
         aps(y_true, y_pred)
-
-    brier = make_index_balanced_accuracy(
-        alpha=0.5, squared=True)(brier_score_loss)
-    with raises(AttributeError):
-        brier(y_true, y_pred)
-
-    kappa = make_index_balanced_accuracy(
-        alpha=0.5, squared=True)(cohen_kappa_score)
-    with raises(AttributeError):
-        kappa(y_true, y_pred)
-
-    ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score)
-    with raises(AttributeError):
-        ras(y_true, y_pred)
diff --git a/imblearn/metrics/tests/test_score_objects.py b/imblearn/metrics/tests/test_score_objects.py
index 3e9dd3e20..b1612a965 100644
--- a/imblearn/metrics/tests/test_score_objects.py
+++ b/imblearn/metrics/tests/test_score_objects.py
@@ -19,127 +19,46 @@
 R_TOL = 1e-2
 
 
-@pytest.mark.filterwarnings("ignore:Liblinear failed to converge")
-def test_imblearn_classification_scorers():
+@pytest.fixture
+def data():
     X, y = make_blobs(random_state=0, centers=2)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    clf = LinearSVC(random_state=0)
-    clf.fit(X_train, y_train)
-
-    # sensitivity scorer
-    scorer = make_scorer(sensitivity_score, pos_label=None, average='macro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(sensitivity_score, pos_label=None, average='weighted')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(sensitivity_score, pos_label=None, average='micro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(sensitivity_score, pos_label=1)
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    # specificity scorer
-    scorer = make_scorer(specificity_score, pos_label=None, average='macro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(specificity_score, pos_label=None, average='weighted')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(specificity_score, pos_label=None, average='micro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(specificity_score, pos_label=1)
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.95, rtol=R_TOL)
-
-    # geometric_mean scorer
-    scorer = make_scorer(geometric_mean_score, pos_label=None, average='macro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
-
-    scorer = make_scorer(
-        geometric_mean_score, pos_label=None, average='weighted')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
+    return train_test_split(X, y, random_state=0)
 
-    scorer = make_scorer(geometric_mean_score, pos_label=None, average='micro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
 
-    scorer = make_scorer(geometric_mean_score, pos_label=1)
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.92, rtol=R_TOL)
+@pytest.mark.filterwarnings("ignore:Liblinear failed to converge")
+@pytest.mark.parametrize(
+    "score, expected_score",
+    [(sensitivity_score, 0.92),
+     (specificity_score, 0.92),
+     (geometric_mean_score, 0.92),
+     (make_index_balanced_accuracy()(geometric_mean_score), 0.85)]
+)
+@pytest.mark.parametrize("average",['macro', 'weighted', 'micro'])
+def test_scorer_common_average(data, score, expected_score, average):
+    X_train, X_test, y_train, _ = data
 
-    # make a iba metric before a scorer
-    geo_mean_iba = make_index_balanced_accuracy()(geometric_mean_score)
-    scorer = make_scorer(geo_mean_iba, pos_label=None, average='macro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
+    scorer = make_scorer(score, pos_label=None, average=average)
+    grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]},
+                        scoring=scorer, cv=3, iid=False)
     grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.85, rtol=R_TOL)
 
-    scorer = make_scorer(geo_mean_iba, pos_label=None, average='weighted')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.85, rtol=R_TOL)
+    assert grid.best_score_ == pytest.approx(expected_score, rel=R_TOL)
 
-    scorer = make_scorer(geo_mean_iba, pos_label=None, average='micro')
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.85, rtol=R_TOL)
 
-    scorer = make_scorer(geo_mean_iba, pos_label=1)
-    grid = GridSearchCV(
-        LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer,
-        cv=3, iid=False)
-    grid.fit(X_train, y_train).predict(X_test)
-    assert_allclose(grid.best_score_, 0.84, rtol=R_TOL)
+@pytest.mark.filterwarnings("ignore:Liblinear failed to converge")
+@pytest.mark.parametrize(
+    "score, average, expected_score",
+    [(sensitivity_score, 'binary', 0.92),
+     (specificity_score, 'binary', 0.95),
+     (geometric_mean_score, 'multiclass', 0.92),
+     (make_index_balanced_accuracy()(geometric_mean_score), 'multiclass', 0.84)]
+)
+def test_scorer_default_average(data, score, average, expected_score):
+    X_train, X_test, y_train, _ = data
+
+    scorer = make_scorer(score, pos_label=1, average=average)
+    grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]},
+                        scoring=scorer, cv=3, iid=False)
+    grid.fit(X_train, y_train).predict(X_test)
+
+    assert grid.best_score_ == pytest.approx(expected_score, rel=R_TOL)
diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py
index 94223f9bc..6e0d238e0 100644
--- a/imblearn/over_sampling/tests/test_adasyn.py
+++ b/imblearn/over_sampling/tests/test_adasyn.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose, assert_array_equal
 from sklearn.neighbors import NearestNeighbors
@@ -62,13 +60,6 @@ def test_ada_fit_resample():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_ada_fit_sampling_strategy_error():
-    sampling_strategy = {0: 9, 1: 12}
-    ada = ADASYN(sampling_strategy=sampling_strategy, random_state=RND_SEED)
-    with raises(ValueError, match="No samples will be generated."):
-        ada.fit_resample(X, Y)
-
-
 def test_ada_fit_resample_nn_obj():
     nn = NearestNeighbors(n_neighbors=6)
     ada = ADASYN(random_state=RND_SEED, n_neighbors=nn)
@@ -95,8 +86,12 @@ def test_ada_fit_resample_nn_obj():
     assert_array_equal(y_resampled, y_gt)
 
 
-def test_ada_wrong_nn_obj():
-    nn = 'rnd'
-    ada = ADASYN(random_state=RND_SEED, n_neighbors=nn)
-    with raises(ValueError, match="has to be one of"):
-        ada.fit_resample(X, Y)
+@pytest.mark.parametrize(
+    "adasyn_params, err_msg",
+    [({"sampling_strategy": {0: 9, 1: 12}}, "No samples will be generated."),
+     ({"n_neighbors": 'rnd'}, "has to be one of")]
+)
+def test_adasyn_error(adasyn_params, err_msg):
+    adasyn = ADASYN(**adasyn_params)
+    with pytest.raises(ValueError, match=err_msg):
+        adasyn.fit_resample(X, Y)
diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py
index 484f6110a..6838edefb 100644
--- a/imblearn/over_sampling/tests/test_random_over_sampler.py
+++ b/imblearn/over_sampling/tests/test_random_over_sampler.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 from collections import Counter
 
 import numpy as np
diff --git a/imblearn/over_sampling/tests/test_smote.py b/imblearn/over_sampling/tests/test_smote.py
index 674eb7021..6964ab79f 100644
--- a/imblearn/over_sampling/tests/test_smote.py
+++ b/imblearn/over_sampling/tests/test_smote.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 import pytest
 
@@ -31,13 +29,6 @@
 R_TOL = 1e-4
 
 
-def test_smote_wrong_kind():
-    kind = 'rnd'
-    smote = SMOTE(kind=kind, random_state=RND_SEED)
-    with pytest.raises(ValueError, match="Unknown kind for SMOTE"):
-        smote.fit_resample(X, Y)
-
-
 def test_sample_regular():
     smote = SMOTE(random_state=RND_SEED)
     X_resampled, y_resampled = smote.fit_resample(X, Y)
@@ -236,24 +227,24 @@ def test_sample_regular_with_nn():
 
 @pytest.mark.filterwarnings('ignore:"kind" is deprecated in 0.4 and will be')
 @pytest.mark.filterwarnings('ignore:"m_neighbors" is deprecated in 0.4 and')
-def test_wrong_nn():
-    kind = 'borderline1'
-    nn_m = 'rnd'
-    nn_k = NearestNeighbors(n_neighbors=6)
-    smote = SMOTE(
-        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)
-    with pytest.raises(ValueError, match="has to be one of"):
-        smote.fit_resample(X, Y)
-    nn_k = 'rnd'
-    nn_m = NearestNeighbors(n_neighbors=10)
-    smote = SMOTE(
-        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)
-    with pytest.raises(ValueError, match="has to be one of"):
-        smote.fit_resample(X, Y)
-    kind = 'regular'
-    nn_k = 'rnd'
-    smote = SMOTE(random_state=RND_SEED, kind=kind, k_neighbors=nn_k)
-    with pytest.raises(ValueError, match="has to be one of"):
+@pytest.mark.filterwarnings('ignore:"svm_estimator" is deprecated in 0.4 and')
+@pytest.mark.parametrize(
+    "smote_params, err_msg",
+    [({"kind": "rnd"}, "Unknown kind for SMOTE"),
+     ({"kind": "borderline1",
+       "k_neighbors": NearestNeighbors(n_neighbors=6),
+       "m_neighbors": 'rnd'}, "has to be one of"),
+     ({"k_neighbors": 'rnd',
+       "m_neighbors": NearestNeighbors(n_neighbors=10)}, "has to be one of"),
+     ({"kind": "regular",
+       "k_neighbors": 'rnd'}, "has to be one of"),
+     ({"kind": "svm",
+       "k_neighbors": NearestNeighbors(n_neighbors=6),
+       "svm_estimator": 'rnd'}, "has to be one of")]
+)
+def test_smote_error_passing_estimator(smote_params, err_msg):
+    smote = SMOTE(**smote_params)
+    with pytest.raises(ValueError, match=err_msg):
         smote.fit_resample(X, Y)
 
 
@@ -298,19 +289,6 @@ def test_sample_with_nn_svm():
     assert_array_equal(y_resampled, y_gt)
 
 
-@pytest.mark.filterwarnings('ignore:"kind" is deprecated in 0.4 and will be')
-@pytest.mark.filterwarnings('ignore:"svm_estimator" is deprecated in 0.4 and')
-def test_sample_regular_wrong_svm():
-    kind = 'svm'
-    nn_k = NearestNeighbors(n_neighbors=6)
-    svm = 'rnd'
-    smote = SMOTE(
-        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
-
-    with pytest.raises(ValueError, match="has to be one of"):
-        smote.fit_resample(X, Y)
-
-
 def test_borderline_smote_wrong_kind():
     bsmote = BorderlineSMOTE(kind='rand')
     with pytest.raises(ValueError, match='The possible "kind" of algorithm'):
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
index b22d17615..373f18c76 100644
--- a/imblearn/tensorflow/tests/test_generator.py
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -15,12 +15,17 @@
 tf = pytest.importorskip('tensorflow')
 
 
-@pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()])
-def test_balanced_batch_generator(sampler):
+@pytest.fixture
+def data():
     X, y = load_iris(return_X_y=True)
     X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
     X = X.astype(np.float32)
+    return X, y
+
 
+@pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()])
+def test_balanced_batch_generator(data, sampler):
+    X, y = data
     batch_size = 10
     training_generator, steps_per_epoch = balanced_batch_generator(
         X, y, sample_weight=None, sampler=sampler,
@@ -74,10 +79,8 @@ def accuracy(y_true, y_pred):
 
 
 @pytest.mark.parametrize("keep_sparse", [True, False])
-def test_balanced_batch_generator_function_sparse(keep_sparse):
-    X, y = load_iris(return_X_y=True)
-    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
-    X = X.astype(np.float32)
+def test_balanced_batch_generator_function_sparse(data, keep_sparse):
+    X, y = data
 
     training_generator, steps_per_epoch = balanced_batch_generator(
         sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
index 190d8f9e5..f2274ecf9 100644
--- a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
+++ b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
@@ -6,7 +6,7 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import division, print_function
+from __future__ import division
 
 import numpy as np
 from scipy import sparse
diff --git a/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py
index cda6d5549..87d959731 100644
--- a/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py
+++ b/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py
@@ -1,11 +1,9 @@
 """Test the module cluster centroids."""
-from __future__ import print_function
-
 from collections import Counter
 
+import pytest
 import numpy as np
 from scipy import sparse
-from pytest import raises
 
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
@@ -23,13 +21,14 @@
 R_TOL = 1e-4
 
 
-def test_fit_resample_check_voting():
+@pytest.mark.parametrize(
+    "X, expected_voting",
+    [(X, 'soft'), (sparse.csr_matrix(X), 'hard')]
+)
+def test_fit_resample_check_voting(X, expected_voting):
     cc = ClusterCentroids(random_state=RND_SEED)
     cc.fit_resample(X, Y)
-    assert cc.voting_ == 'soft'
-    cc = ClusterCentroids(random_state=RND_SEED)
-    cc.fit_resample(sparse.csr_matrix(X), Y)
-    assert cc.voting_ == 'hard'
+    assert cc.voting_ == expected_voting
 
 
 def test_fit_resample_auto():
@@ -111,20 +110,12 @@ def test_fit_hard_voting():
         assert np.any(np.all(x == X, axis=1))
 
 
-def test_fit_resample_error():
-    sampling_strategy = 'auto'
-    cluster = 'rnd'
-    cc = ClusterCentroids(
-        sampling_strategy=sampling_strategy,
-        random_state=RND_SEED,
-        estimator=cluster)
-    with raises(ValueError, match="has to be a KMeans clustering"):
-        cc.fit_resample(X, Y)
-
-    voting = 'unknown'
-    cc = ClusterCentroids(
-        sampling_strategy=sampling_strategy,
-        voting=voting,
-        random_state=RND_SEED)
-    with raises(ValueError, match="needs to be one of"):
+@pytest.mark.parametrize(
+    "cluster_centroids_params, err_msg",
+    [({"estimator": "rnd"}, "has to be a KMeans clustering"),
+     ({"voting": "unknown"}, "needs to be one of")]
+)
+def test_fit_resample_error(cluster_centroids_params, err_msg):
+    cc = ClusterCentroids(**cluster_centroids_params)
+    with pytest.raises(ValueError, match=err_msg):
         cc.fit_resample(X, Y)
diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
index 716802d25..f9fb5e67a 100644
--- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
+++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
@@ -4,7 +4,7 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import division, print_function
+from __future__ import division
 
 from collections import Counter
 
diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
index 39de438e7..bec2be700 100644
--- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py
+++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
@@ -5,7 +5,7 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import division, print_function
+from __future__ import division
 
 import numpy as np
 from sklearn.neighbors import NearestNeighbors
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py b/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py
index 4ea4d5977..d970716d8 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from pytest import raises
 
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py
index fba3c0937..7e0e14e27 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py
@@ -3,12 +3,10 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
-from sklearn.utils.testing import assert_array_equal
 from pytest import raises
 
+from sklearn.utils.testing import assert_array_equal
 from sklearn.neighbors import KNeighborsClassifier
 
 from imblearn.under_sampling import CondensedNearestNeighbour
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py
index a5f85df9c..65012cff9 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from pytest import raises
 
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
index 8a4bd5d71..b213e5b82 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from pytest import raises
 
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py b/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py
index b84021113..c5e08c529 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py
@@ -3,10 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_array_equal
 from sklearn.neighbors import NearestNeighbors
@@ -28,33 +26,17 @@
 VERSION_NEARMISS = (1, 2, 3)
 
 
-def test_nearmiss_wrong_version():
-    version = 1000
-    nm = NearMiss(version=version)
-    with raises(ValueError, match="must be 1, 2 or 3"):
-        nm.fit_resample(X, Y)
-
-
-def test_nm_wrong_nn_obj():
-    sampling_strategy = 'auto'
-    nn = 'rnd'
-    nm = NearMiss(
-        sampling_strategy=sampling_strategy,
-        version=VERSION_NEARMISS,
-        return_indices=True,
-        n_neighbors=nn)
-    with raises(ValueError, match="has to be one of"):
+@pytest.mark.parametrize(
+    "nearmiss_params, err_msg",
+    [({"version": 1000}, "must be 1, 2 or 3"),
+     ({"version": 1, "n_neighbors": 'rnd'}, "has to be one of"),
+     ({"version": 3, "n_neighbors": NearestNeighbors(n_neighbors=3),
+       "n_neighbors_ver3": "rnd"}, "has to be one of")]
+)
+def test_nearmiss_error(nearmiss_params, err_msg):
+    nm = NearMiss(**nearmiss_params)
+    with pytest.raises(ValueError, match=err_msg):
         nm.fit_resample(X, Y)
-    nn3 = 'rnd'
-    nn = NearestNeighbors(n_neighbors=3)
-    nm3 = NearMiss(
-        sampling_strategy=sampling_strategy,
-        version=3,
-        return_indices=True,
-        n_neighbors=nn,
-        n_neighbors_ver3=nn3)
-    with raises(ValueError, match="has to be one of"):
-        nm3.fit_resample(X, Y)
 
 
 def test_nm_fit_resample_auto():
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py
index c2b3a0143..c40f202d7 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py
@@ -3,8 +3,8 @@
 #          Christos Aridas
 # License: MIT
 
+import pytest
 import numpy as np
-from pytest import raises
 
 from sklearn.utils.testing import assert_array_equal
 from sklearn.neighbors import NearestNeighbors
@@ -24,21 +24,16 @@
 Y = np.array([1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 0, 0, 2, 1, 2])
 
 
-def test_ncr_error():
-    threshold_cleaning = -10
-    with raises(
-            ValueError,
-            match=("'threshold_cleaning' is a value between"
-                   " 0 and 1")):
-        NeighbourhoodCleaningRule(
-            threshold_cleaning=threshold_cleaning).fit_resample(X, Y)
-    threshold_cleaning = 10
-    with raises(
-            ValueError,
-            match=("'threshold_cleaning' is a value between"
-                   " 0 and 1")):
-        NeighbourhoodCleaningRule(
-            threshold_cleaning=threshold_cleaning).fit_resample(X, Y)
+@pytest.mark.parametrize(
+    "ncr_params, err_msg",
+    [({"threshold_cleaning": -10}, "value between 0 and 1"),
+     ({"threshold_cleaning": 10}, "value between 0 and 1"),
+     ({"n_neighbors": 'rnd'}, "has to be one of")]
+)
+def test_ncr_error(ncr_params, err_msg):
+    ncr = NeighbourhoodCleaningRule(**ncr_params)
+    with pytest.raises(ValueError, match=err_msg):
+        ncr.fit_resample(X, Y)
 
 
 def test_ncr_fit_resample():
@@ -106,13 +101,6 @@ def test_ncr_fit_resample_nn_obj():
     assert_array_equal(idx_under, idx_gt)
 
 
-def test_ncr_wrong_nn_obj():
-    nn = 'rnd'
-    ncr = NeighbourhoodCleaningRule(return_indices=True, n_neighbors=nn)
-    with raises(ValueError, match="has to be one of"):
-        ncr.fit_resample(X, Y)
-
-
 def test_deprecation_random_state():
     ncr = NeighbourhoodCleaningRule(random_state=0)
     with warns(
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py
index 2e8c1af2c..29bd947ee 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py
@@ -3,13 +3,10 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from pytest import raises
 
 from sklearn.utils.testing import assert_array_equal
-
 from sklearn.neighbors import KNeighborsClassifier
 
 from imblearn.under_sampling import OneSidedSelection
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py
index eecb23b64..0fc01bcde 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 from collections import Counter
 
 import numpy as np
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py
index 10cf1e1c3..683c2b82f 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from pytest import raises
 
diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py
index 22aaa156c..eb800c06e 100644
--- a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py
+++ b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py
@@ -3,8 +3,6 @@
 #          Christos Aridas
 # License: MIT
 
-from __future__ import print_function
-
 import numpy as np
 from sklearn.utils.testing import assert_array_equal
 
diff --git a/imblearn/utils/tests/test_docstring.py b/imblearn/utils/tests/test_docstring.py
index 0b7966fc3..ec8305029 100644
--- a/imblearn/utils/tests/test_docstring.py
+++ b/imblearn/utils/tests/test_docstring.py
@@ -15,8 +15,7 @@
     xxx
 
     yyy
-    """.rstrip()
-
+    """
 
 def func(param_1, param_2):
     """A function.
@@ -38,7 +37,7 @@ def func(param_1, param_2):
     xxx
 
     yyy
-    """.rstrip()
+    """
 
 
 class cls:
@@ -60,4 +59,4 @@ def __init__(self, param_1, param_2):
                                                 (cls, cls_docstring)])
 def test_docstring_inject(obj, obj_docstring):
     obj_injected_docstring = Substitution(param_1='xxx', param_2='yyy')(obj)
-    obj_injected_docstring.__doc__ == obj_docstring
+    assert obj_injected_docstring.__doc__ == obj_docstring