Skip to content

TST: refactor and pytest style #470

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats_new/v0.0.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ Maintenance
- Catch deprecation warning in testing.
:issue:`441` by :user:`Guillaume Lemaitre <glemaitre>`.

- Refactor and impose `pytest` style tests.
:issue:`470` by :user:`Guillaume Lemaitre <glemaitre>`.

Documentation
.............

Expand Down
20 changes: 9 additions & 11 deletions imblearn/combine/tests/test_smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
# Christos Aridas
# License: MIT

from __future__ import print_function

import pytest
import numpy as np
from pytest import raises

from sklearn.utils.testing import assert_allclose, assert_array_equal

Expand Down Expand Up @@ -100,12 +98,12 @@ def test_validate_estimator_default():
assert_array_equal(y_resampled, y_gt)


def test_error_wrong_object():
smote = 'rnd'
enn = 'rnd'
smt = SMOTEENN(smote=smote, random_state=RND_SEED)
with raises(ValueError, match="smote needs to be a SMOTE"):
smt.fit_resample(X, Y)
smt = SMOTEENN(enn=enn, random_state=RND_SEED)
with raises(ValueError, match="enn needs to be an "):
@pytest.mark.parametrize(
"smote_params, err_msg",
[({'smote': 'rnd'}, "smote needs to be a SMOTE"),
({'enn': 'rnd'}, "enn needs to be an ")]
)
def test_error_wrong_object(smote_params, err_msg):
smt = SMOTEENN(**smote_params)
with pytest.raises(ValueError, match=err_msg):
smt.fit_resample(X, Y)
20 changes: 9 additions & 11 deletions imblearn/combine/tests/test_smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
# Christos Aridas
# License: MIT

from __future__ import print_function

import pytest
import numpy as np
from pytest import raises

from sklearn.utils.testing import assert_allclose, assert_array_equal

Expand Down Expand Up @@ -106,12 +104,12 @@ def test_validate_estimator_default():
assert_array_equal(y_resampled, y_gt)


def test_error_wrong_object():
smote = 'rnd'
tomek = 'rnd'
smt = SMOTETomek(smote=smote, random_state=RND_SEED)
with raises(ValueError, match="smote needs to be a SMOTE"):
smt.fit_resample(X, Y)
smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
with raises(ValueError, match="tomek needs to be a TomekLinks"):
@pytest.mark.parametrize(
"smote_params, err_msg",
[({'smote': 'rnd'}, "smote needs to be a SMOTE"),
({'tomek': 'rnd'}, "tomek needs to be a TomekLinks")]
)
def test_error_wrong_object(smote_params, err_msg):
smt = SMOTETomek(**smote_params)
with pytest.raises(ValueError, match=err_msg):
smt.fit_resample(X, Y)
86 changes: 44 additions & 42 deletions imblearn/datasets/tests/test_imbalance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,64 +3,66 @@
# Christos Aridas
# License: MIT

from __future__ import print_function

from collections import Counter

import pytest
import numpy as np

from pytest import raises

from sklearn.datasets import load_iris

from imblearn.datasets import make_imbalance

data = load_iris()
X, Y = data.data, data.target

@pytest.fixture
def iris():
return load_iris(return_X_y=True)

def test_make_imbalanced_backcompat():

def test_make_imbalanced_backcompat(iris):
# check an error is raised with we don't pass sampling_strategy and ratio
with raises(TypeError, match="missing 1 required positional argument"):
make_imbalance(X, Y)
with pytest.raises(TypeError, match="missing 1 required positional argument"):
make_imbalance(*iris)


def test_make_imbalance_error():
@pytest.mark.parametrize(
"sampling_strategy, err_msg",
[({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"),
({0: 10, 1: 70}, "should be less or equal to the original"),
('random-string', "has to be a dictionary or a function")]
)
def test_make_imbalance_error(iris, sampling_strategy, err_msg):
# we are reusing part of utils.check_sampling_strategy, however this is not
# cover in the common tests so we will repeat it here
sampling_strategy = {0: -100, 1: 50, 2: 50}
with raises(ValueError, match="in a class cannot be negative"):
make_imbalance(X, Y, sampling_strategy)
sampling_strategy = {0: 10, 1: 70}
with raises(ValueError, match="should be less or equal to the original"):
make_imbalance(X, Y, sampling_strategy)
y_ = np.zeros((X.shape[0], ))
sampling_strategy = {0: 10}
with raises(ValueError, match="needs to have more than 1 class."):
make_imbalance(X, y_, sampling_strategy)
sampling_strategy = 'random-string'
with raises(ValueError, match="has to be a dictionary or a function"):
make_imbalance(X, Y, sampling_strategy)


def test_make_imbalance_dict():
sampling_strategy = {0: 10, 1: 20, 2: 30}
X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
assert Counter(y_) == sampling_strategy

sampling_strategy = {0: 10, 1: 20}
X_, y_ = make_imbalance(X, Y, sampling_strategy=sampling_strategy)
assert Counter(y_) == {0: 10, 1: 20, 2: 50}
X, y = iris
with pytest.raises(ValueError, match=err_msg):
make_imbalance(X, y, sampling_strategy)


def test_make_imbalance_error_single_class(iris):
X, y = iris
y = np.zeros_like(y)
with pytest.raises(ValueError, match="needs to have more than 1 class."):
make_imbalance(X, y, {0: 10})


@pytest.mark.parametrize(
"sampling_strategy, expected_counts",
[({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
)
def test_make_imbalance_dict(iris, sampling_strategy, expected_counts):
X, y = iris
_, y_ = make_imbalance(X, y, sampling_strategy=sampling_strategy)
assert Counter(y_) == expected_counts


@pytest.mark.filterwarnings("ignore:'ratio' has been deprecated in 0.4")
def test_make_imbalance_ratio():
# check that using 'ratio' is working
sampling_strategy = {0: 10, 1: 20, 2: 30}
X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
assert Counter(y_) == sampling_strategy

sampling_strategy = {0: 10, 1: 20}
X_, y_ = make_imbalance(X, Y, ratio=sampling_strategy)
assert Counter(y_) == {0: 10, 1: 20, 2: 50}
@pytest.mark.parametrize(
"sampling_strategy, expected_counts",
[({0: 10, 1: 20, 2: 30}, {0: 10, 1: 20, 2: 30}),
({0: 10, 1: 20}, {0: 10, 1: 20, 2: 50})]
)
def test_make_imbalance_dict_ratio(iris, sampling_strategy, expected_counts):
X, y = iris
_, y_ = make_imbalance(X, y, ratio=sampling_strategy)
assert Counter(y_) == expected_counts
27 changes: 14 additions & 13 deletions imblearn/datasets/tests/test_zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
# Christos Aridas
# License: MIT

from imblearn.datasets import fetch_datasets
from sklearn.utils.testing import SkipTest, assert_allclose
import pytest

from pytest import raises
from imblearn.datasets import fetch_datasets
from sklearn.utils.testing import SkipTest

DATASET_SHAPE = {
'ecoli': (336, 7),
Expand Down Expand Up @@ -79,19 +79,20 @@ def test_fetch_filter():
assert DATASET_SHAPE['ecoli'] == X1.shape
assert X1.shape == X2.shape

assert_allclose(X1.sum(), X2.sum())
assert X1.sum() == pytest.approx(X2.sum())

y1, y2 = datasets1['ecoli'].target, datasets2['ecoli'].target
assert (X1.shape[0], ) == y1.shape
assert (X1.shape[0], ) == y2.shape


def test_fetch_error():
with raises(ValueError, match='is not a dataset available.'):
fetch_datasets(filter_data=tuple(['rnd']))
with raises(ValueError, match='dataset with the ID='):
fetch_datasets(filter_data=tuple([-1]))
with raises(ValueError, match='dataset with the ID='):
fetch_datasets(filter_data=tuple([100]))
with raises(ValueError, match='value in the tuple'):
fetch_datasets(filter_data=tuple([1.00]))
@pytest.mark.parametrize(
"filter_data, err_msg",
[(('rnf',), "is not a dataset available"),
((-1,), "dataset with the ID="),
((100,), "dataset with the ID="),
((1.00,), "value in the tuple")]
)
def test_fetch_error(filter_data, err_msg):
with pytest.raises(ValueError, match=err_msg):
fetch_datasets(filter_data=filter_data)
2 changes: 0 additions & 2 deletions imblearn/ensemble/tests/test_balance_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# Christos Aridas
# License: MIT

from __future__ import print_function

import numpy as np

from pytest import raises
Expand Down
40 changes: 24 additions & 16 deletions imblearn/keras/tests/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@
from imblearn.keras import BalancedBatchGenerator
from imblearn.keras import balanced_batch_generator

iris = load_iris()
X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
y = to_categorical(y, 3)

@pytest.fixture
def data():
iris = load_iris()
X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
y = to_categorical(y, 3)
return X, y


def _build_keras_model(n_classes, n_features):
Expand All @@ -31,19 +35,20 @@ def _build_keras_model(n_classes, n_features):
return model


def test_balanced_batch_generator_class_no_return_indices():
def test_balanced_batch_generator_class_no_return_indices(data):
with pytest.raises(ValueError, match='needs to return the indices'):
BalancedBatchGenerator(X, y, sampler=ClusterCentroids(), batch_size=10)
BalancedBatchGenerator(*data, sampler=ClusterCentroids(), batch_size=10)


@pytest.mark.parametrize(
"sampler, sample_weight",
[(None, None),
(RandomOverSampler(), None),
(NearMiss(), None),
(None, np.random.uniform(size=(y.shape[0])))]
(None, np.random.uniform(size=120))]
)
def test_balanced_batch_generator_class(sampler, sample_weight):
def test_balanced_batch_generator_class(data, sampler, sample_weight):
X, y = data
model = _build_keras_model(y.shape[1], X.shape[1])
training_generator = BalancedBatchGenerator(X, y,
sample_weight=sample_weight,
Expand All @@ -55,33 +60,35 @@ def test_balanced_batch_generator_class(sampler, sample_weight):


@pytest.mark.parametrize("keep_sparse", [True, False])
def test_balanced_batch_generator_class_sparse(keep_sparse):
def test_balanced_batch_generator_class_sparse(data, keep_sparse):
X, y = data
training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
batch_size=10,
keep_sparse=keep_sparse,
random_state=42)
for idx in range(len(training_generator)):
X_batch, y_batch = training_generator.__getitem__(idx)
X_batch, _ = training_generator.__getitem__(idx)
if keep_sparse:
assert sparse.issparse(X_batch)
else:
assert not sparse.issparse(X_batch)


def test_balanced_batch_generator_function_no_return_indices():
def test_balanced_batch_generator_function_no_return_indices(data):
with pytest.raises(ValueError, match='needs to return the indices'):
balanced_batch_generator(
X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42)
*data, sampler=ClusterCentroids(), batch_size=10, random_state=42)


@pytest.mark.parametrize(
"sampler, sample_weight",
[(None, None),
(RandomOverSampler(), None),
(NearMiss(), None),
(None, np.random.uniform(size=(y.shape[0])))]
(None, np.random.uniform(size=120))]
)
def test_balanced_batch_generator_function(sampler, sample_weight):
def test_balanced_batch_generator_function(data, sampler, sample_weight):
X, y = data
model = _build_keras_model(y.shape[1], X.shape[1])
training_generator, steps_per_epoch = balanced_batch_generator(
X, y, sample_weight=sample_weight, sampler=sampler, batch_size=10,
Expand All @@ -92,12 +99,13 @@ def test_balanced_batch_generator_function(sampler, sample_weight):


@pytest.mark.parametrize("keep_sparse", [True, False])
def test_balanced_batch_generator_function_sparse(keep_sparse):
def test_balanced_batch_generator_function_sparse(data, keep_sparse):
X, y = data
training_generator, steps_per_epoch = balanced_batch_generator(
sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
random_state=42)
for idx in range(steps_per_epoch):
X_batch, y_batch = next(training_generator)
for _ in range(steps_per_epoch):
X_batch, _ = next(training_generator)
if keep_sparse:
assert sparse.issparse(X_batch)
else:
Expand Down
Loading