diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst index 913bf3634..ff62c255a 100644 --- a/doc/whats_new/v0.0.4.rst +++ b/doc/whats_new/v0.0.4.rst @@ -104,6 +104,9 @@ Maintenance - Catch deprecation warning in testing. :issue:`441` by :user:`Guillaume Lemaitre `. +- Refactor and impose `pytest` style tests. + :issue:`470` by :user:`Guillaume Lemaitre `. + Documentation ............. diff --git a/imblearn/combine/tests/test_smote_enn.py b/imblearn/combine/tests/test_smote_enn.py index 39a16f10e..a40b77579 100644 --- a/imblearn/combine/tests/test_smote_enn.py +++ b/imblearn/combine/tests/test_smote_enn.py @@ -3,10 +3,8 @@ # Christos Aridas # License: MIT -from __future__ import print_function - +import pytest import numpy as np -from pytest import raises from sklearn.utils.testing import assert_allclose, assert_array_equal @@ -100,12 +98,12 @@ def test_validate_estimator_default(): assert_array_equal(y_resampled, y_gt) -def test_error_wrong_object(): - smote = 'rnd' - enn = 'rnd' - smt = SMOTEENN(smote=smote, random_state=RND_SEED) - with raises(ValueError, match="smote needs to be a SMOTE"): - smt.fit_resample(X, Y) - smt = SMOTEENN(enn=enn, random_state=RND_SEED) - with raises(ValueError, match="enn needs to be an "): +@pytest.mark.parametrize( + "smote_params, err_msg", + [({'smote': 'rnd'}, "smote needs to be a SMOTE"), + ({'enn': 'rnd'}, "enn needs to be an ")] +) +def test_error_wrong_object(smote_params, err_msg): + smt = SMOTEENN(**smote_params) + with pytest.raises(ValueError, match=err_msg): smt.fit_resample(X, Y) diff --git a/imblearn/combine/tests/test_smote_tomek.py b/imblearn/combine/tests/test_smote_tomek.py index 2221bc463..b2c2ad999 100644 --- a/imblearn/combine/tests/test_smote_tomek.py +++ b/imblearn/combine/tests/test_smote_tomek.py @@ -3,10 +3,8 @@ # Christos Aridas # License: MIT -from __future__ import print_function - +import pytest import numpy as np -from pytest import raises from sklearn.utils.testing import assert_allclose, assert_array_equal @@ -106,12 +104,12 @@ def test_validate_estimator_default(): assert_array_equal(y_resampled, y_gt) -def test_error_wrong_object(): - smote = 'rnd' - tomek = 'rnd' - smt = SMOTETomek(smote=smote, random_state=RND_SEED) - with raises(ValueError, match="smote needs to be a SMOTE"): - smt.fit_resample(X, Y) - smt = SMOTETomek(tomek=tomek, random_state=RND_SEED) - with raises(ValueError, match="tomek needs to be a TomekLinks"): +@pytest.mark.parametrize( + "smote_params, err_msg", + [({'smote': 'rnd'}, "smote needs to be a SMOTE"), + ({'tomek': 'rnd'}, "tomek needs to be a TomekLinks")] +) +def test_error_wrong_object(smote_params, err_msg): + smt = SMOTETomek(**smote_params) + with pytest.raises(ValueError, match=err_msg): smt.fit_resample(X, Y) diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py index 6776a6daf..18cd98e79 100644 --- a/imblearn/datasets/tests/test_imbalance.py +++ b/imblearn/datasets/tests/test_imbalance.py @@ -3,64 +3,66 @@ # Christos Aridas # License: MIT -from __future__ import print_function - from collections import Counter import pytest import numpy as np -from pytest import raises - from sklearn.datasets import load_iris from imblearn.datasets import make_imbalance -data = load_iris() -X, Y = data.data, data.target +@pytest.fixture +def iris(): + return load_iris(return_X_y=True) -def test_make_imbalanced_backcompat(): + +def test_make_imbalanced_backcompat(iris): # check an error is raised with we don't pass sampling_strategy and ratio - with raises(TypeError, match="missing 1 required positional argument"): - make_imbalance(X, Y) + with pytest.raises(TypeError, match="missing 1 required positional argument"): + make_imbalance(*iris) -def test_make_imbalance_error(): +@pytest.mark.parametrize( + "sampling_strategy, err_msg", + [({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"), + ({0: 10, 1: 70}, "should be less or equal to the original"), + ('random-string', "has to be a dictionary or a function")] +) +def test_make_imbalance_error(iris, sampling_strategy, err_msg): # we are reusing part of utils.check_sampling_strategy, however this is not # cover in the common tests so we will repeat it here - sampling_strategy = {0: -100, 1: 50, 2: 50} - with raises(ValueError, match="in a class cannot be negative"): - make_imbalance(X, Y, sampling_strategy) - sampling_strategy = {0: 10, 1: 70} - with raises(ValueError, match="should be less or equal to the original"): - make_imbalance(X, Y, sampling_strategy) - y_ = np.zeros((X.shape[0], )) - sampling_strategy = {0: 10} - with raises(ValueError, match="needs to have more than 1 class."): - make_imbalance(X, y_, sampling_strategy) - sampling_strategy = 'random-string' - with raises(ValueError, match="has to be a dictionary or a function"): - make_imbalance(X, Y, sampling_strategy) - - -def test_make_imbalance_dict(): - sampling_strategy = {0: 10, 1: 20, 2: 30} - X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy) - assert Counter(y_) == sampling_strategy - - sampling_strategy = {0: 10, 1: 20} - X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy) - assert Counter(y_) == {0: 10, 1: 20, 2: 50} + X, y = iris + with pytest.raises(ValueError, match=err_msg): + make_imbalance(X, y, sampling_strategy) + + +def test_make_imbalance_error_single_class(iris): + X, y = iris + y = np.zeros_like(y) + with pytest.raises(ValueError, match="needs to have more than 1 class."): + make_imbalance(X, y, {0: 10}) + + +@pytest.mark.parametrize( + "sampling_strategy, expected_counts", + [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}), + ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})] +) +def test_make_imbalance_dict(iris, sampling_strategy, expected_counts): + X, y = iris + _, y_ = make_imbalance(X, y, sampling_strategy=sampling_strategy) + assert Counter(y_) == expected_counts @pytest.mark.filterwarnings("ignore:'ratio' has been deprecated in 0.4") -def test_make_imbalance_ratio(): - # check that using 'ratio' is working - sampling_strategy = {0: 10, 1: 20, 2: 30} - X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy) - assert Counter(y_) == sampling_strategy - - sampling_strategy = {0: 10, 1: 20} - X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy) - assert Counter(y_) == {0: 10, 1: 20, 2: 50} +@pytest.mark.parametrize( + "sampling_strategy, expected_counts", + [({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}), + ({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})] +) +def test_make_imbalance_dict_ratio(iris, sampling_strategy, expected_counts): + X, y = iris + _, y_ = make_imbalance(X, y, ratio=sampling_strategy) + assert Counter(y_) == expected_counts diff --git a/imblearn/datasets/tests/test_zenodo.py b/imblearn/datasets/tests/test_zenodo.py index 7d95784ec..28d0a06cc 100644 --- a/imblearn/datasets/tests/test_zenodo.py +++ b/imblearn/datasets/tests/test_zenodo.py @@ -6,10 +6,10 @@ # Christos Aridas # License: MIT -from imblearn.datasets import fetch_datasets -from sklearn.utils.testing import SkipTest, assert_allclose +import pytest -from pytest import raises +from imblearn.datasets import fetch_datasets +from sklearn.utils.testing import SkipTest DATASET_SHAPE = { 'ecoli': (336, 7), @@ -79,19 +79,20 @@ def test_fetch_filter(): assert DATASET_SHAPE['ecoli'] == X1.shape assert X1.shape == X2.shape - assert_allclose(X1.sum(), X2.sum()) + assert X1.sum() == pytest.approx(X2.sum()) y1, y2 = datasets1['ecoli'].target, datasets2['ecoli'].target assert (X1.shape[0], ) == y1.shape assert (X1.shape[0], ) == y2.shape -def test_fetch_error(): - with raises(ValueError, match='is not a dataset available.'): - fetch_datasets(filter_data=tuple(['rnd'])) - with raises(ValueError, match='dataset with the ID='): - fetch_datasets(filter_data=tuple([-1])) - with raises(ValueError, match='dataset with the ID='): - fetch_datasets(filter_data=tuple([100])) - with raises(ValueError, match='value in the tuple'): - fetch_datasets(filter_data=tuple([1.00])) +@pytest.mark.parametrize( + "filter_data, err_msg", + [(('rnf',), "is not a dataset available"), + ((-1,), "dataset with the ID="), + ((100,), "dataset with the ID="), + ((1.00,), "value in the tuple")] +) +def test_fetch_error(filter_data, err_msg): + with pytest.raises(ValueError, match=err_msg): + fetch_datasets(filter_data=filter_data) diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py index 334c6cd20..4c712d5eb 100644 --- a/imblearn/ensemble/tests/test_balance_cascade.py +++ b/imblearn/ensemble/tests/test_balance_cascade.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py index 4565652f9..f541bfd1a 100644 --- a/imblearn/keras/tests/test_generator.py +++ b/imblearn/keras/tests/test_generator.py @@ -18,9 +18,13 @@ from imblearn.keras import BalancedBatchGenerator from imblearn.keras import balanced_batch_generator -iris = load_iris() -X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40}) -y = to_categorical(y, 3) + +@pytest.fixture +def data(): + iris = load_iris() + X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40}) + y = to_categorical(y, 3) + return X, y def _build_keras_model(n_classes, n_features): @@ -31,9 +35,9 @@ def _build_keras_model(n_classes, n_features): return model -def test_balanced_batch_generator_class_no_return_indices(): +def test_balanced_batch_generator_class_no_return_indices(data): with pytest.raises(ValueError, match='needs to return the indices'): - BalancedBatchGenerator(X, y, sampler=ClusterCentroids(), batch_size=10) + BalancedBatchGenerator(*data, sampler=ClusterCentroids(), batch_size=10) @pytest.mark.parametrize( @@ -41,9 +45,10 @@ def test_balanced_batch_generator_class_no_return_indices(): [(None, None), (RandomOverSampler(), None), (NearMiss(), None), - (None, np.random.uniform(size=(y.shape[0])))] + (None, np.random.uniform(size=120))] ) -def test_balanced_batch_generator_class(sampler, sample_weight): +def test_balanced_batch_generator_class(data, sampler, sample_weight): + X, y = data model = _build_keras_model(y.shape[1], X.shape[1]) training_generator = BalancedBatchGenerator(X, y, sample_weight=sample_weight, @@ -55,23 +60,24 @@ def test_balanced_batch_generator_class(sampler, sample_weight): @pytest.mark.parametrize("keep_sparse", [True, False]) -def test_balanced_batch_generator_class_sparse(keep_sparse): +def test_balanced_batch_generator_class_sparse(data, keep_sparse): + X, y = data training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y, batch_size=10, keep_sparse=keep_sparse, random_state=42) for idx in range(len(training_generator)): - X_batch, y_batch = training_generator.__getitem__(idx) + X_batch, _ = training_generator.__getitem__(idx) if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch) -def test_balanced_batch_generator_function_no_return_indices(): +def test_balanced_batch_generator_function_no_return_indices(data): with pytest.raises(ValueError, match='needs to return the indices'): balanced_batch_generator( - X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42) + *data, sampler=ClusterCentroids(), batch_size=10, random_state=42) @pytest.mark.parametrize( @@ -79,9 +85,10 @@ def test_balanced_batch_generator_function_no_return_indices(): [(None, None), (RandomOverSampler(), None), (NearMiss(), None), - (None, np.random.uniform(size=(y.shape[0])))] + (None, np.random.uniform(size=120))] ) -def test_balanced_batch_generator_function(sampler, sample_weight): +def test_balanced_batch_generator_function(data, sampler, sample_weight): + X, y = data model = _build_keras_model(y.shape[1], X.shape[1]) training_generator, steps_per_epoch = balanced_batch_generator( X, y, sample_weight=sample_weight, sampler=sampler, batch_size=10, @@ -92,12 +99,13 @@ def test_balanced_batch_generator_function(sampler, sample_weight): @pytest.mark.parametrize("keep_sparse", [True, False]) -def test_balanced_batch_generator_function_sparse(keep_sparse): +def test_balanced_batch_generator_function_sparse(data, keep_sparse): + X, y = data training_generator, steps_per_epoch = balanced_batch_generator( sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, random_state=42) - for idx in range(steps_per_epoch): - X_batch, y_batch = next(training_generator) + for _ in range(steps_per_epoch): + X_batch, _ = next(training_generator) if keep_sparse: assert sparse.issparse(X_batch) else: diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py index ae2585e13..664361eb7 100644 --- a/imblearn/metrics/tests/test_classification.py +++ b/imblearn/metrics/tests/test_classification.py @@ -4,14 +4,13 @@ # Christos Aridas # License: MIT -from __future__ import division, print_function +from __future__ import division from functools import partial import numpy as np import pytest -from pytest import approx, raises from sklearn import datasets from sklearn import svm @@ -103,49 +102,40 @@ def test_sensitivity_specificity_score_binary(): # individual scoring function that can be used for grid search: in the # binary class case the score is the value of the measure for the positive # class (e.g. label == 1). This is deprecated for average != 'binary'. - for kwargs, my_assert in [({}, assert_no_warnings), ({ - 'average': 'binary' - }, assert_no_warnings)]: - sen = my_assert(sensitivity_score, y_true, y_pred, **kwargs) - assert_allclose(sen, 0.68, rtol=R_TOL) + for kwargs in ({}, {'average': 'binary'}): + sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs) + assert sen == pytest.approx(0.68, rel=R_TOL) - spe = my_assert(specificity_score, y_true, y_pred, **kwargs) - assert_allclose(spe, 0.88, rtol=R_TOL) + spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs) + assert spe == pytest.approx(0.88, rel=R_TOL) @pytest.mark.filterwarnings("ignore:Specificity is ill-defined") -def test_sensitivity_specificity_f_binary_single_class(): +@pytest.mark.parametrize( + "y_pred, expected_sensitivity, expected_specificity", + [(([1, 1], [1, 1]), 1.0, 0.0), + (([-1, -1], [-1, -1]), 0.0, 0.0)] +) +def test_sensitivity_specificity_f_binary_single_class( + y_pred, expected_sensitivity, expected_specificity): # Such a case may occur with non-stratified cross-validation - assert sensitivity_score([1, 1], [1, 1]) == 1. - assert specificity_score([1, 1], [1, 1]) == 0. + assert sensitivity_score(*y_pred) == expected_sensitivity + assert specificity_score(*y_pred) == expected_specificity - assert sensitivity_score([-1, -1], [-1, -1]) == 0. - assert specificity_score([-1, -1], [-1, -1]) == 0. - -def test_sensitivity_specificity_extra_labels(): +@pytest.mark.parametrize( + "average, expected_specificty", + [(None, [1., 0.67, 1., 1., 1.]), + ('macro', np.mean([1., 0.67, 1., 1., 1.])), + ('micro', 15 / 16)] +) +def test_sensitivity_specificity_extra_labels(average, expected_specificty): y_true = [1, 3, 3, 2] y_pred = [1, 1, 3, 2] - # No average: zeros in array - actual = specificity_score( - y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None) - assert_allclose([1., 0.67, 1., 1., 1.], actual, rtol=R_TOL) - - # Macro average is changed - actual = specificity_score( - y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro') - assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL) - - # Check for micro - actual = specificity_score( - y_true, y_pred, labels=[0, 1, 2, 3, 4], average='micro') - assert_allclose(15. / 16., actual, rtol=R_TOL) - - # Check for weighted actual = specificity_score( - y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro') - assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL) + y_true, y_pred, labels=[0, 1, 2, 3, 4], average=average) + assert_allclose(expected_specificty, actual, rtol=R_TOL) def test_sensitivity_specificity_ignored_labels(): @@ -176,7 +166,7 @@ def test_sensitivity_specificity_error_multilabels(): y_true_bin = label_binarize(y_true, classes=np.arange(5)) y_pred_bin = label_binarize(y_pred, classes=np.arange(5)) - with raises(ValueError): + with pytest.raises(ValueError): sensitivity_score(y_true_bin, y_pred_bin) @@ -184,12 +174,12 @@ def test_sensitivity_specificity_support_errors(): y_true, y_pred, _ = make_prediction(binary=True) # Bad pos_label - with raises(ValueError): + with pytest.raises(ValueError): sensitivity_specificity_support( y_true, y_pred, pos_label=2, average='binary') # Bad average option - with raises(ValueError): + with pytest.raises(ValueError): sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega') @@ -210,105 +200,59 @@ def test_geometric_mean_support_binary(): @pytest.mark.filterwarnings("ignore:Recall is ill-defined") -def test_geometric_mean_multiclass(): - y_true = [0, 0, 1, 1] - y_pred = [0, 0, 1, 1] - assert_allclose(geometric_mean_score(y_true, y_pred), 1.0, rtol=R_TOL) - - y_true = [0, 0, 0, 0] - y_pred = [1, 1, 1, 1] - assert_allclose(geometric_mean_score(y_true, y_pred), 0.0, rtol=R_TOL) - - cor = 0.001 - y_true = [0, 0, 0, 0] - y_pred = [0, 0, 0, 0] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), 1.0, rtol=R_TOL) - - y_true = [0, 0, 0, 0] - y_pred = [1, 1, 1, 1] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), cor, rtol=R_TOL) - - y_true = [0, 0, 1, 1] - y_pred = [0, 1, 1, 0] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), 0.5, rtol=R_TOL) - - y_true = [0, 1, 2, 0, 1, 2] - y_pred = [0, 2, 1, 0, 0, 1] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), - (1 * cor * cor) ** (1.0 / 3.0), - rtol=R_TOL) - - y_true = [0, 1, 2, 3, 4, 5] - y_pred = [0, 1, 2, 3, 4, 5] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), 1, rtol=R_TOL) - - y_true = [0, 1, 1, 1, 1, 0] - y_pred = [0, 0, 1, 1, 1, 1] - assert_allclose( - geometric_mean_score(y_true, y_pred, correction=cor), - (0.5 * 0.75) ** 0.5, - rtol=R_TOL) - - y_true = [0, 1, 2, 0, 1, 2] - y_pred = [0, 2, 1, 0, 0, 1] - assert_allclose( - geometric_mean_score(y_true, y_pred, average='macro'), - 0.47140452079103168, - rtol=R_TOL) - assert_allclose( - geometric_mean_score(y_true, y_pred, average='micro'), - 0.47140452079103168, - rtol=R_TOL) - assert_allclose( - geometric_mean_score(y_true, y_pred, average='weighted'), - 0.47140452079103168, - rtol=R_TOL) - assert_allclose( - geometric_mean_score(y_true, y_pred, average=None), - [0.8660254, 0.0, 0.0], - rtol=R_TOL) +@pytest.mark.parametrize( + "y_true, y_pred, correction, expected_gmean", + [([0, 0, 1, 1], [0, 0, 1, 1], 0.0, 1.0), + ([0, 0, 0, 0], [1, 1, 1, 1], 0.0, 0.0), + ([0, 0, 0, 0], [0, 0, 0, 0], 0.001, 1.0), + ([0, 0, 0, 0], [1, 1, 1, 1], 0.001, 0.001), + ([0, 0, 1, 1], [0, 1, 1, 0], 0.001, 0.5), + ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 0.001, (0.001 ** 2) ** (1 / 3)), + ([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], 0.001, 1), + ([0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], 0.001, (0.5 * 0.75) ** 0.5)] +) +def test_geometric_mean_multiclass(y_true, y_pred, correction, expected_gmean): + gmean = geometric_mean_score(y_true, y_pred, correction=correction) + assert gmean == pytest.approx(expected_gmean, rel=R_TOL) - y_true = [0, 1, 2, 0, 1, 2] - y_pred = [0, 1, 1, 0, 0, 1] - assert_allclose( - geometric_mean_score(y_true, y_pred, labels=[0, 1]), - 0.70710678118654752, - rtol=R_TOL) - assert_allclose( - geometric_mean_score( - y_true, y_pred, labels=[0, 1], sample_weight=[1, 2, 1, 1, 2, 1]), - 0.70710678118654752, - rtol=R_TOL) - assert_allclose( - geometric_mean_score( - y_true, - y_pred, - labels=[0, 1], - sample_weight=[1, 2, 1, 1, 2, 1], - average='weighted'), - 0.3333333333, - rtol=R_TOL) +@pytest.mark.filterwarnings("ignore:Recall is ill-defined") +@pytest.mark.parametrize( + "y_true, y_pred, average, expected_gmean", + [([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'macro', 0.471), + ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'micro', 0.471), + ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 'weighted', 0.471), + ([0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], None, [0.8660254, 0.0, 0.0])] +) +def test_geometric_mean_average(y_true, y_pred, average, expected_gmean): + gmean = geometric_mean_score(y_true, y_pred, average=average) + assert gmean == pytest.approx(expected_gmean, rel=R_TOL) + + +@pytest.mark.parametrize( + "y_true, y_pred, sample_weight, average, expected_gmean", + [([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], None, 'multiclass', 0.707), + ([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], [1, 2, 1, 1, 2, 1], 'multiclass', 0.707), + ([0, 1, 2, 0, 1, 2], [0, 1, 1, 0, 0, 1], [1, 2, 1, 1, 2, 1], 'weighted', 0.333)] +) +def test_geometric_mean_sample_weight(y_true, y_pred, sample_weight, average, + expected_gmean): + gmean = geometric_mean_score(y_true, y_pred, labels=[0, 1], + sample_weight=sample_weight, + average=average) + assert gmean == pytest.approx(expected_gmean, rel=R_TOL) + + +@pytest.mark.parametrize( + "average, expected_gmean", + [('multiclass', 0.41), (None, [0.85, 0.29, 0.7]), + ('macro', 0.68), ('weighted', 0.65)] +) +def test_geometric_mean_score_prediction(average, expected_gmean): y_true, y_pred, _ = make_prediction(binary=False) - geo_mean = geometric_mean_score(y_true, y_pred) - assert_allclose(geo_mean, 0.41, rtol=R_TOL) - - # Compute the geometric mean for each of the classes - geo_mean = geometric_mean_score(y_true, y_pred, average=None) - assert_allclose(geo_mean, [0.85, 0.29, 0.7], rtol=R_TOL) - - # average tests - geo_mean = geometric_mean_score(y_true, y_pred, average='macro') - assert_allclose(geo_mean, 0.68, rtol=R_TOL) - - geo_mean = geometric_mean_score(y_true, y_pred, average='weighted') - assert_allclose(geo_mean, 0.65, rtol=R_TOL) + gmean = geometric_mean_score(y_true, y_pred, average=average) + assert gmean == pytest.approx(expected_gmean, rel=R_TOL) def test_iba_geo_mean_binary(): @@ -412,7 +356,7 @@ def test_classification_report_imbalanced_multiclass_with_unicode_label(): u'red¢ 0.42 0.90 0.55 0.57 0.70 0.51 20 avg / total ' u'0.51 0.53 0.80 0.47 0.58 0.40 75') if np_version[:3] < (1, 7, 0): - with raises(RuntimeError, match="NumPy < 1.7.0"): + with pytest.raises(RuntimeError, match="NumPy < 1.7.0"): classification_report_imbalanced(y_true, y_pred) else: report = classification_report_imbalanced(y_true, y_pred) @@ -435,46 +379,28 @@ def test_classification_report_imbalanced_multiclass_with_long_string_label(): assert _format_report(report) == expected_report -def test_iba_sklearn_metrics(): +@pytest.mark.parametrize( + "score, expected_score", + [(accuracy_score, 0.54756), (jaccard_similarity_score, 0.54756), + (precision_score, 0.65025), (recall_score, 0.41616)] +) +def test_iba_sklearn_metrics(score, expected_score): y_true, y_pred, _ = make_prediction(binary=True) - acc = make_index_balanced_accuracy(alpha=0.5, squared=True)(accuracy_score) - score = acc(y_true, y_pred) - assert score == approx(0.54756) - - jss = make_index_balanced_accuracy( - alpha=0.5, squared=True)(jaccard_similarity_score) - score = jss(y_true, y_pred) - assert score == approx(0.54756) + score_iba = make_index_balanced_accuracy(alpha=0.5, squared=True)(score) + score = score_iba(y_true, y_pred) + assert score == pytest.approx(expected_score) - pre = make_index_balanced_accuracy( - alpha=0.5, squared=True)(precision_score) - score = pre(y_true, y_pred) - assert score == approx(0.65025) - rec = make_index_balanced_accuracy(alpha=0.5, squared=True)(recall_score) - score = rec(y_true, y_pred) - assert score == approx(0.41616000000000009) - - -def test_iba_error_y_score_prob(): +@pytest.mark.parametrize( + "score_loss", + [average_precision_score, brier_score_loss, + cohen_kappa_score, roc_auc_score] +) +def test_iba_error_y_score_prob_error(score_loss): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy( - alpha=0.5, squared=True)(average_precision_score) - with raises(AttributeError): + alpha=0.5, squared=True)(score_loss) + with pytest.raises(AttributeError): aps(y_true, y_pred) - - brier = make_index_balanced_accuracy( - alpha=0.5, squared=True)(brier_score_loss) - with raises(AttributeError): - brier(y_true, y_pred) - - kappa = make_index_balanced_accuracy( - alpha=0.5, squared=True)(cohen_kappa_score) - with raises(AttributeError): - kappa(y_true, y_pred) - - ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score) - with raises(AttributeError): - ras(y_true, y_pred) diff --git a/imblearn/metrics/tests/test_score_objects.py b/imblearn/metrics/tests/test_score_objects.py index 3e9dd3e20..b1612a965 100644 --- a/imblearn/metrics/tests/test_score_objects.py +++ b/imblearn/metrics/tests/test_score_objects.py @@ -19,127 +19,46 @@ R_TOL = 1e-2 -@pytest.mark.filterwarnings("ignore:Liblinear failed to converge") -def test_imblearn_classification_scorers(): +@pytest.fixture +def data(): X, y = make_blobs(random_state=0, centers=2) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - clf = LinearSVC(random_state=0) - clf.fit(X_train, y_train) - - # sensitivity scorer - scorer = make_scorer(sensitivity_score, pos_label=None, average='macro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(sensitivity_score, pos_label=None, average='weighted') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(sensitivity_score, pos_label=None, average='micro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(sensitivity_score, pos_label=1) - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - # specificity scorer - scorer = make_scorer(specificity_score, pos_label=None, average='macro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(specificity_score, pos_label=None, average='weighted') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(specificity_score, pos_label=None, average='micro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer(specificity_score, pos_label=1) - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.95, rtol=R_TOL) - - # geometric_mean scorer - scorer = make_scorer(geometric_mean_score, pos_label=None, average='macro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - - scorer = make_scorer( - geometric_mean_score, pos_label=None, average='weighted') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) + return train_test_split(X, y, random_state=0) - scorer = make_scorer(geometric_mean_score, pos_label=None, average='micro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) - scorer = make_scorer(geometric_mean_score, pos_label=1) - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) +@pytest.mark.filterwarnings("ignore:Liblinear failed to converge") +@pytest.mark.parametrize( + "score, expected_score", + [(sensitivity_score, 0.92), + (specificity_score, 0.92), + (geometric_mean_score, 0.92), + (make_index_balanced_accuracy()(geometric_mean_score), 0.85)] +) +@pytest.mark.parametrize("average",['macro', 'weighted', 'micro']) +def test_scorer_common_average(data, score, expected_score, average): + X_train, X_test, y_train, _ = data - # make a iba metric before a scorer - geo_mean_iba = make_index_balanced_accuracy()(geometric_mean_score) - scorer = make_scorer(geo_mean_iba, pos_label=None, average='macro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) + scorer = make_scorer(score, pos_label=None, average=average) + grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, + scoring=scorer, cv=3, iid=False) grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) - scorer = make_scorer(geo_mean_iba, pos_label=None, average='weighted') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) + assert grid.best_score_ == pytest.approx(expected_score, rel=R_TOL) - scorer = make_scorer(geo_mean_iba, pos_label=None, average='micro') - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) - scorer = make_scorer(geo_mean_iba, pos_label=1) - grid = GridSearchCV( - LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, - cv=3, iid=False) - grid.fit(X_train, y_train).predict(X_test) - assert_allclose(grid.best_score_, 0.84, rtol=R_TOL) +@pytest.mark.filterwarnings("ignore:Liblinear failed to converge") +@pytest.mark.parametrize( + "score, average, expected_score", + [(sensitivity_score, 'binary', 0.92), + (specificity_score, 'binary', 0.95), + (geometric_mean_score, 'multiclass', 0.92), + (make_index_balanced_accuracy()(geometric_mean_score), 'multiclass', 0.84)] +) +def test_scorer_default_average(data, score, average, expected_score): + X_train, X_test, y_train, _ = data + + scorer = make_scorer(score, pos_label=1, average=average) + grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, + scoring=scorer, cv=3, iid=False) + grid.fit(X_train, y_train).predict(X_test) + + assert grid.best_score_ == pytest.approx(expected_score, rel=R_TOL) diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py index 94223f9bc..6e0d238e0 100644 --- a/imblearn/over_sampling/tests/test_adasyn.py +++ b/imblearn/over_sampling/tests/test_adasyn.py @@ -3,10 +3,8 @@ # Christos Aridas # License: MIT -from __future__ import print_function - +import pytest import numpy as np -from pytest import raises from sklearn.utils.testing import assert_allclose, assert_array_equal from sklearn.neighbors import NearestNeighbors @@ -62,13 +60,6 @@ def test_ada_fit_resample(): assert_array_equal(y_resampled, y_gt) -def test_ada_fit_sampling_strategy_error(): - sampling_strategy = {0: 9, 1: 12} - ada = ADASYN(sampling_strategy=sampling_strategy, random_state=RND_SEED) - with raises(ValueError, match="No samples will be generated."): - ada.fit_resample(X, Y) - - def test_ada_fit_resample_nn_obj(): nn = NearestNeighbors(n_neighbors=6) ada = ADASYN(random_state=RND_SEED, n_neighbors=nn) @@ -95,8 +86,12 @@ def test_ada_fit_resample_nn_obj(): assert_array_equal(y_resampled, y_gt) -def test_ada_wrong_nn_obj(): - nn = 'rnd' - ada = ADASYN(random_state=RND_SEED, n_neighbors=nn) - with raises(ValueError, match="has to be one of"): - ada.fit_resample(X, Y) +@pytest.mark.parametrize( + "adasyn_params, err_msg", + [({"sampling_strategy": {0: 9, 1: 12}}, "No samples will be generated."), + ({"n_neighbors": 'rnd'}, "has to be one of")] +) +def test_adasyn_error(adasyn_params, err_msg): + adasyn = ADASYN(**adasyn_params) + with pytest.raises(ValueError, match=err_msg): + adasyn.fit_resample(X, Y) diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py index 484f6110a..6838edefb 100644 --- a/imblearn/over_sampling/tests/test_random_over_sampler.py +++ b/imblearn/over_sampling/tests/test_random_over_sampler.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - from collections import Counter import numpy as np diff --git a/imblearn/over_sampling/tests/test_smote.py b/imblearn/over_sampling/tests/test_smote.py index 674eb7021..6964ab79f 100644 --- a/imblearn/over_sampling/tests/test_smote.py +++ b/imblearn/over_sampling/tests/test_smote.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np import pytest @@ -31,13 +29,6 @@ R_TOL = 1e-4 -def test_smote_wrong_kind(): - kind = 'rnd' - smote = SMOTE(kind=kind, random_state=RND_SEED) - with pytest.raises(ValueError, match="Unknown kind for SMOTE"): - smote.fit_resample(X, Y) - - def test_sample_regular(): smote = SMOTE(random_state=RND_SEED) X_resampled, y_resampled = smote.fit_resample(X, Y) @@ -236,24 +227,24 @@ def test_sample_regular_with_nn(): @pytest.mark.filterwarnings('ignore:"kind" is deprecated in 0.4 and will be') @pytest.mark.filterwarnings('ignore:"m_neighbors" is deprecated in 0.4 and') -def test_wrong_nn(): - kind = 'borderline1' - nn_m = 'rnd' - nn_k = NearestNeighbors(n_neighbors=6) - smote = SMOTE( - random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m) - with pytest.raises(ValueError, match="has to be one of"): - smote.fit_resample(X, Y) - nn_k = 'rnd' - nn_m = NearestNeighbors(n_neighbors=10) - smote = SMOTE( - random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m) - with pytest.raises(ValueError, match="has to be one of"): - smote.fit_resample(X, Y) - kind = 'regular' - nn_k = 'rnd' - smote = SMOTE(random_state=RND_SEED, kind=kind, k_neighbors=nn_k) - with pytest.raises(ValueError, match="has to be one of"): +@pytest.mark.filterwarnings('ignore:"svm_estimator" is deprecated in 0.4 and') +@pytest.mark.parametrize( + "smote_params, err_msg", + [({"kind": "rnd"}, "Unknown kind for SMOTE"), + ({"kind": "borderline1", + "k_neighbors": NearestNeighbors(n_neighbors=6), + "m_neighbors": 'rnd'}, "has to be one of"), + ({"k_neighbors": 'rnd', + "m_neighbors": NearestNeighbors(n_neighbors=10)}, "has to be one of"), + ({"kind": "regular", + "k_neighbors": 'rnd'}, "has to be one of"), + ({"kind": "svm", + "k_neighbors": NearestNeighbors(n_neighbors=6), + "svm_estimator": 'rnd'}, "has to be one of")] +) +def test_smote_error_passing_estimator(smote_params, err_msg): + smote = SMOTE(**smote_params) + with pytest.raises(ValueError, match=err_msg): smote.fit_resample(X, Y) @@ -298,19 +289,6 @@ def test_sample_with_nn_svm(): assert_array_equal(y_resampled, y_gt) -@pytest.mark.filterwarnings('ignore:"kind" is deprecated in 0.4 and will be') -@pytest.mark.filterwarnings('ignore:"svm_estimator" is deprecated in 0.4 and') -def test_sample_regular_wrong_svm(): - kind = 'svm' - nn_k = NearestNeighbors(n_neighbors=6) - svm = 'rnd' - smote = SMOTE( - random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm) - - with pytest.raises(ValueError, match="has to be one of"): - smote.fit_resample(X, Y) - - def test_borderline_smote_wrong_kind(): bsmote = BorderlineSMOTE(kind='rand') with pytest.raises(ValueError, match='The possible "kind" of algorithm'): diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py index b22d17615..373f18c76 100644 --- a/imblearn/tensorflow/tests/test_generator.py +++ b/imblearn/tensorflow/tests/test_generator.py @@ -15,12 +15,17 @@ tf = pytest.importorskip('tensorflow') -@pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()]) -def test_balanced_batch_generator(sampler): +@pytest.fixture +def data(): X, y = load_iris(return_X_y=True) X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40}) X = X.astype(np.float32) + return X, y + +@pytest.mark.parametrize("sampler", [None, NearMiss(), RandomOverSampler()]) +def test_balanced_batch_generator(data, sampler): + X, y = data batch_size = 10 training_generator, steps_per_epoch = balanced_batch_generator( X, y, sample_weight=None, sampler=sampler, @@ -74,10 +79,8 @@ def accuracy(y_true, y_pred): @pytest.mark.parametrize("keep_sparse", [True, False]) -def test_balanced_batch_generator_function_sparse(keep_sparse): - X, y = load_iris(return_X_y=True) - X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40}) - X = X.astype(np.float32) +def test_balanced_batch_generator_function_sparse(data, keep_sparse): + X, y = data training_generator, steps_per_epoch = balanced_batch_generator( sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py index 190d8f9e5..f2274ecf9 100644 --- a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py +++ b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py @@ -6,7 +6,7 @@ # Christos Aridas # License: MIT -from __future__ import division, print_function +from __future__ import division import numpy as np from scipy import sparse diff --git a/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py index cda6d5549..87d959731 100644 --- a/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py +++ b/imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py @@ -1,11 +1,9 @@ """Test the module cluster centroids.""" -from __future__ import print_function - from collections import Counter +import pytest import numpy as np from scipy import sparse -from pytest import raises from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_equal @@ -23,13 +21,14 @@ R_TOL = 1e-4 -def test_fit_resample_check_voting(): +@pytest.mark.parametrize( + "X, expected_voting", + [(X, 'soft'), (sparse.csr_matrix(X), 'hard')] +) +def test_fit_resample_check_voting(X, expected_voting): cc = ClusterCentroids(random_state=RND_SEED) cc.fit_resample(X, Y) - assert cc.voting_ == 'soft' - cc = ClusterCentroids(random_state=RND_SEED) - cc.fit_resample(sparse.csr_matrix(X), Y) - assert cc.voting_ == 'hard' + assert cc.voting_ == expected_voting def test_fit_resample_auto(): @@ -111,20 +110,12 @@ def test_fit_hard_voting(): assert np.any(np.all(x == X, axis=1)) -def test_fit_resample_error(): - sampling_strategy = 'auto' - cluster = 'rnd' - cc = ClusterCentroids( - sampling_strategy=sampling_strategy, - random_state=RND_SEED, - estimator=cluster) - with raises(ValueError, match="has to be a KMeans clustering"): - cc.fit_resample(X, Y) - - voting = 'unknown' - cc = ClusterCentroids( - sampling_strategy=sampling_strategy, - voting=voting, - random_state=RND_SEED) - with raises(ValueError, match="needs to be one of"): +@pytest.mark.parametrize( + "cluster_centroids_params, err_msg", + [({"estimator": "rnd"}, "has to be a KMeans clustering"), + ({"voting": "unknown"}, "needs to be one of")] +) +def test_fit_resample_error(cluster_centroids_params, err_msg): + cc = ClusterCentroids(**cluster_centroids_params) + with pytest.raises(ValueError, match=err_msg): cc.fit_resample(X, Y) diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py index 716802d25..f9fb5e67a 100644 --- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py @@ -4,7 +4,7 @@ # Christos Aridas # License: MIT -from __future__ import division, print_function +from __future__ import division from collections import Counter diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py index 39de438e7..bec2be700 100644 --- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py +++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py @@ -5,7 +5,7 @@ # Christos Aridas # License: MIT -from __future__ import division, print_function +from __future__ import division import numpy as np from sklearn.neighbors import NearestNeighbors diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py b/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py index 4ea4d5977..d970716d8 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_allknn.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py index fba3c0937..7e0e14e27 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_condensed_nearest_neighbour.py @@ -3,12 +3,10 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np -from sklearn.utils.testing import assert_array_equal from pytest import raises +from sklearn.utils.testing import assert_array_equal from sklearn.neighbors import KNeighborsClassifier from imblearn.under_sampling import CondensedNearestNeighbour diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py index a5f85df9c..65012cff9 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_edited_nearest_neighbours.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py index 8a4bd5d71..b213e5b82 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py b/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py index b84021113..c5e08c529 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_nearmiss.py @@ -3,10 +3,8 @@ # Christos Aridas # License: MIT -from __future__ import print_function - +import pytest import numpy as np -from pytest import raises from sklearn.utils.testing import assert_array_equal from sklearn.neighbors import NearestNeighbors @@ -28,33 +26,17 @@ VERSION_NEARMISS = (1, 2, 3) -def test_nearmiss_wrong_version(): - version = 1000 - nm = NearMiss(version=version) - with raises(ValueError, match="must be 1, 2 or 3"): - nm.fit_resample(X, Y) - - -def test_nm_wrong_nn_obj(): - sampling_strategy = 'auto' - nn = 'rnd' - nm = NearMiss( - sampling_strategy=sampling_strategy, - version=VERSION_NEARMISS, - return_indices=True, - n_neighbors=nn) - with raises(ValueError, match="has to be one of"): +@pytest.mark.parametrize( + "nearmiss_params, err_msg", + [({"version": 1000}, "must be 1, 2 or 3"), + ({"version": 1, "n_neighbors": 'rnd'}, "has to be one of"), + ({"version": 3, "n_neighbors": NearestNeighbors(n_neighbors=3), + "n_neighbors_ver3": "rnd"}, "has to be one of")] +) +def test_nearmiss_error(nearmiss_params, err_msg): + nm = NearMiss(**nearmiss_params) + with pytest.raises(ValueError, match=err_msg): nm.fit_resample(X, Y) - nn3 = 'rnd' - nn = NearestNeighbors(n_neighbors=3) - nm3 = NearMiss( - sampling_strategy=sampling_strategy, - version=3, - return_indices=True, - n_neighbors=nn, - n_neighbors_ver3=nn3) - with raises(ValueError, match="has to be one of"): - nm3.fit_resample(X, Y) def test_nm_fit_resample_auto(): diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py index c2b3a0143..c40f202d7 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_neighbourhood_cleaning_rule.py @@ -3,8 +3,8 @@ # Christos Aridas # License: MIT +import pytest import numpy as np -from pytest import raises from sklearn.utils.testing import assert_array_equal from sklearn.neighbors import NearestNeighbors @@ -24,21 +24,16 @@ Y = np.array([1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 0, 0, 2, 1, 2]) -def test_ncr_error(): - threshold_cleaning = -10 - with raises( - ValueError, - match=("'threshold_cleaning' is a value between" - " 0 and 1")): - NeighbourhoodCleaningRule( - threshold_cleaning=threshold_cleaning).fit_resample(X, Y) - threshold_cleaning = 10 - with raises( - ValueError, - match=("'threshold_cleaning' is a value between" - " 0 and 1")): - NeighbourhoodCleaningRule( - threshold_cleaning=threshold_cleaning).fit_resample(X, Y) +@pytest.mark.parametrize( + "ncr_params, err_msg", + [({"threshold_cleaning": -10}, "value between 0 and 1"), + ({"threshold_cleaning": 10}, "value between 0 and 1"), + ({"n_neighbors": 'rnd'}, "has to be one of")] +) +def test_ncr_error(ncr_params, err_msg): + ncr = NeighbourhoodCleaningRule(**ncr_params) + with pytest.raises(ValueError, match=err_msg): + ncr.fit_resample(X, Y) def test_ncr_fit_resample(): @@ -106,13 +101,6 @@ def test_ncr_fit_resample_nn_obj(): assert_array_equal(idx_under, idx_gt) -def test_ncr_wrong_nn_obj(): - nn = 'rnd' - ncr = NeighbourhoodCleaningRule(return_indices=True, n_neighbors=nn) - with raises(ValueError, match="has to be one of"): - ncr.fit_resample(X, Y) - - def test_deprecation_random_state(): ncr = NeighbourhoodCleaningRule(random_state=0) with warns( diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py index 2e8c1af2c..29bd947ee 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_one_sided_selection.py @@ -3,13 +3,10 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises from sklearn.utils.testing import assert_array_equal - from sklearn.neighbors import KNeighborsClassifier from imblearn.under_sampling import OneSidedSelection diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py index eecb23b64..0fc01bcde 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - from collections import Counter import numpy as np diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py index 10cf1e1c3..683c2b82f 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_repeated_edited_nearest_neighbours.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from pytest import raises diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py index 22aaa156c..eb800c06e 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py @@ -3,8 +3,6 @@ # Christos Aridas # License: MIT -from __future__ import print_function - import numpy as np from sklearn.utils.testing import assert_array_equal diff --git a/imblearn/utils/tests/test_docstring.py b/imblearn/utils/tests/test_docstring.py index 0b7966fc3..ec8305029 100644 --- a/imblearn/utils/tests/test_docstring.py +++ b/imblearn/utils/tests/test_docstring.py @@ -15,8 +15,7 @@ xxx yyy - """.rstrip() - + """ def func(param_1, param_2): """A function. @@ -38,7 +37,7 @@ def func(param_1, param_2): xxx yyy - """.rstrip() + """ class cls: @@ -60,4 +59,4 @@ def __init__(self, param_1, param_2): (cls, cls_docstring)]) def test_docstring_inject(obj, obj_docstring): obj_injected_docstring = Substitution(param_1='xxx', param_2='yyy')(obj) - obj_injected_docstring.__doc__ == obj_docstring + assert obj_injected_docstring.__doc__ == obj_docstring