Skip to content

Commit 839df67

Browse files
committed
MAINT: cleanup deprecation warning in tests and source code (#466)
1 parent c0a4208 commit 839df67

36 files changed

+260
-156
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ matrix:
3535
- env: DISTRIB="conda" PYTHON_VERSION="2.7"
3636
NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.20rc"
3737
- env: DISTRIB="conda" PYTHON_VERSION="3.6"
38-
NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.20rc"
38+
NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="0.20rc"
3939
- env: DISTRIB="conda" PYTHON_VERSION="3.7"
4040
NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.20rc"
4141
- env: DISTRIB="conda" PYTHON_VERSION="3.7"

appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ install:
4242
- activate testenv
4343
- conda install scipy numpy -y -q
4444
- pip install --pre scikit-learn
45-
- "conda install %OPTIONAL_DEP% -y -q"
45+
- conda install %OPTIONAL_DEP% -y -q
4646
- conda install pytest pytest-cov -y -q
4747
- pip install codecov
4848
- pip install .

build_tools/circle/build_doc.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,12 @@ conda update --yes --quiet conda
8888

8989
# Configure the conda environment and put it in the path using the
9090
# provided versions
91-
conda create -n $CONDA_ENV_NAME --yes --quiet python=3
91+
conda create -n $CONDA_ENV_NAME --yes --quiet python=3.6
9292
source activate $CONDA_ENV_NAME
9393

94-
conda install --yes pip numpy scipy scikit-learn pillow matplotlib sphinx \
94+
conda install --yes pip numpy scipy pillow matplotlib sphinx \
9595
sphinx_rtd_theme numpydoc pandas keras
96+
pip install --pre scikit-learn
9697
pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
9798

9899
# Build and install imbalanced-learn in dev mode

build_tools/travis/install.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ if [[ "$DISTRIB" == "conda" ]]; then
4040
source activate testenv
4141
conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
4242

43-
if [[ $PYTHON_VERSION == "3.7" ]]; then
44-
conda install --yes pandas
45-
conda install --yes -c conda-forge keras
43+
if [[ $PYTHON_VERSION == "3.6" ]]; then
44+
# Tensorflow is not available in Python 3.7 yet.
45+
conda install --yes pandas keras tensorflow
4646
KERAS_BACKEND=tensorflow
4747
python -c "import keras.backend"
4848
sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;

doc/ensemble.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ under-sampling the original set::
3232
>>> print(sorted(Counter(y).items()))
3333
[(0, 64), (1, 262), (2, 4674)]
3434
>>> from imblearn.ensemble import EasyEnsemble
35-
>>> ee = EasyEnsemble(random_state=0, n_subsets=10)
36-
>>> X_resampled, y_resampled = ee.fit_resample(X, y)
37-
>>> print(X_resampled.shape)
35+
>>> ee = EasyEnsemble(random_state=0, n_subsets=10) # doctest: +SKIP
36+
>>> X_resampled, y_resampled = ee.fit_resample(X, y) # doctest: +SKIP
37+
>>> print(X_resampled.shape) # doctest: +SKIP
3838
(10, 192, 2)
39-
>>> print(sorted(Counter(y_resampled[0]).items()))
39+
>>> print(sorted(Counter(y_resampled[0]).items())) # doctest: +SKIP
4040
[(0, 64), (1, 64), (2, 64)]
4141

4242
:class:`EasyEnsemble` has two important parameters: (i) ``n_subsets`` will be
@@ -53,7 +53,9 @@ parameter ``n_max_subset`` and an additional bootstraping can be activated with
5353
>>> from imblearn.ensemble import BalanceCascade
5454
>>> from sklearn.linear_model import LogisticRegression
5555
>>> bc = BalanceCascade(random_state=0,
56-
... estimator=LogisticRegression(random_state=0),
56+
... estimator=LogisticRegression(solver='lbfgs',
57+
... multi_class='auto',
58+
... random_state=0),
5759
... n_max_subset=4)
5860
>>> X_resampled, y_resampled = bc.fit_resample(X, y)
5961
>>> print(X_resampled.shape)

doc/under_sampling.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ used as::
340340
>>> oss = OneSidedSelection(random_state=0)
341341
>>> X_resampled, y_resampled = oss.fit_resample(X, y)
342342
>>> print(sorted(Counter(y_resampled).items()))
343-
[(0, 64), (1, 174), (2, 4403)]
343+
[(0, 64), (1, 174), (2, 4404)]
344344

345345
Our implementation offer to set the number of seeds to put in the set :math:`C`
346346
originally by setting the parameter ``n_seeds_S``.
@@ -379,7 +379,8 @@ removed. The class can be used as::
379379
>>> from sklearn.linear_model import LogisticRegression
380380
>>> from imblearn.under_sampling import InstanceHardnessThreshold
381381
>>> iht = InstanceHardnessThreshold(random_state=0,
382-
... estimator=LogisticRegression())
382+
... estimator=LogisticRegression(
383+
... solver='lbfgs', multi_class='auto'))
383384
>>> X_resampled, y_resampled = iht.fit_resample(X, y)
384385
>>> print(sorted(Counter(y_resampled).items()))
385386
[(0, 64), (1, 64), (2, 64)]

doc/whats_new/v0.0.4.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ Maintenance
9797
- Upgrade requirements to scikit-learn 0.20.
9898
:issue:`379` by :user:`Guillaume Lemaitre <glemaitre>`.
9999

100+
- Catch deprecation warning in testing.
101+
:issue:`441` by :user:`Guillaume Lemaitre <glemaitre>`.
102+
100103
Documentation
101104
.............
102105

examples/plot_outlier_rejections.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def plot_scatter(X, y, title):
3737
plt.legend()
3838
plt.title(title)
3939

40+
4041
##############################################################################
4142
# Toy data generation
4243
##############################################################################
@@ -82,11 +83,13 @@ def plot_scatter(X, y, title):
8283
# :class:`imblearn.FunctionSampler` will be called when using the method
8384
# ``fit_resample``.
8485

86+
8587
def outlier_rejection(X, y):
8688
"""This will be our function used to resample our dataset."""
8789
model = IsolationForest(max_samples=100,
8890
contamination=0.4,
89-
random_state=rng)
91+
random_state=rng,
92+
behaviour='new')
9093
model.fit(X)
9194
y_pred = model.predict(X)
9295
return X[y_pred == 1], y[y_pred == 1]
@@ -105,11 +108,12 @@ def outlier_rejection(X, y):
105108
# affected during the prediction.
106109

107110
pipe = make_pipeline(FunctionSampler(func=outlier_rejection),
108-
LogisticRegression(random_state=rng))
111+
LogisticRegression(solver='lbfgs', multi_class='auto',
112+
random_state=rng))
109113
y_pred = pipe.fit(X_train, y_train).predict(X_test)
110114
print(classification_report(y_test, y_pred))
111115

112-
clf = LogisticRegression(random_state=rng)
116+
clf = LogisticRegression(solver='lbfgs', multi_class='auto', random_state=rng)
113117
y_pred = clf.fit(X_train, y_train).predict(X_test)
114118
print(classification_report(y_test, y_pred))
115119

examples/under-sampling/plot_comparison_under_sampling.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,9 @@ def plot_decision_function(X, y, clf, ax):
235235
clf = LinearSVC().fit(X, y)
236236
plot_decision_function(X, y, clf, ax1)
237237
ax1.set_title('Linear SVC with y={}'.format(Counter(y)))
238-
sampler = InstanceHardnessThreshold(random_state=0,
239-
estimator=LogisticRegression())
238+
sampler = InstanceHardnessThreshold(
239+
random_state=0, estimator=LogisticRegression(solver='lbfgs',
240+
multi_class='auto'))
240241
clf = make_pipeline(sampler, LinearSVC())
241242
clf.fit(X, y)
242243
plot_decision_function(X, y, clf, ax2)

examples/under-sampling/plot_instance_hardness_threshold.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ def plot_resampling(ax, X, y, title):
6060
c0, c1 = plot_resampling(ax, X_vis, y, 'Original set')
6161
else:
6262
iht = InstanceHardnessThreshold(sampling_strategy=sampling_strategy,
63-
estimator=LogisticRegression(),
63+
estimator=LogisticRegression(
64+
solver='lbfgs',
65+
multi_class='auto'),
6466
return_indices=True)
6567
X_res, y_res, idx_res = iht.fit_resample(X, y)
6668
X_res_vis = pca.transform(X_res)

imblearn/combine/tests/test_smote_enn.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ def test_sample_regular():
4848
def test_sample_regular_pass_smote_enn():
4949
smote = SMOTEENN(
5050
smote=SMOTE(sampling_strategy='auto', random_state=RND_SEED),
51-
enn=EditedNearestNeighbours(
52-
sampling_strategy='all', random_state=RND_SEED),
51+
enn=EditedNearestNeighbours(sampling_strategy='all'),
5352
random_state=RND_SEED)
5453
X_resampled, y_resampled = smote.fit_resample(X, Y)
5554

@@ -77,8 +76,7 @@ def test_sample_regular_half():
7776

7877
def test_validate_estimator_init():
7978
smote = SMOTE(random_state=RND_SEED)
80-
enn = EditedNearestNeighbours(
81-
random_state=RND_SEED, sampling_strategy='all')
79+
enn = EditedNearestNeighbours(sampling_strategy='all')
8280
smt = SMOTEENN(smote=smote, enn=enn, random_state=RND_SEED)
8381
X_resampled, y_resampled = smt.fit_resample(X, Y)
8482
X_gt = np.array([[1.52091956, -0.49283504], [0.84976473, -0.15570176], [

imblearn/combine/tests/test_smote_tomek.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_sample_regular_half():
7070

7171
def test_validate_estimator_init():
7272
smote = SMOTE(random_state=RND_SEED)
73-
tomek = TomekLinks(random_state=RND_SEED, sampling_strategy='all')
73+
tomek = TomekLinks(sampling_strategy='all')
7474
smt = SMOTETomek(smote=smote, tomek=tomek, random_state=RND_SEED)
7575
X_resampled, y_resampled = smt.fit_resample(X, Y)
7676
X_gt = np.array([[0.68481731, 0.51935141], [1.34192108, -0.13367336], [

imblearn/datasets/tests/test_imbalance.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from collections import Counter
99

10+
import pytest
1011
import numpy as np
1112

1213
from pytest import raises
@@ -53,6 +54,7 @@ def test_make_imbalance_dict():
5354
assert Counter(y_) == {0: 10, 1: 20, 2: 50}
5455

5556

57+
@pytest.mark.filterwarnings("ignore:'ratio' has been deprecated in 0.4")
5658
def test_make_imbalance_ratio():
5759
# check that using 'ratio' is working
5860
sampling_strategy = {0: 10, 1: 20, 2: 30}

imblearn/ensemble/_balance_cascade.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _fit_resample(self, X, y):
179179
# fit and predict using cross validation
180180
X_subset = safe_indexing(X, subset_indices)
181181
y_subset = safe_indexing(y, subset_indices)
182-
pred = cross_val_predict(self.estimator_, X_subset, y_subset)
182+
pred = cross_val_predict(self.estimator_, X_subset, y_subset, cv=3)
183183
# extract the prediction about the targeted classes only
184184
pred_target = pred[:index_under_sample.size]
185185
index_classified = index_under_sample[pred_target == safe_indexing(

imblearn/ensemble/_easy_ensemble.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ class EasyEnsemble(BaseEnsembleSampler):
9393
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
9494
>>> print('Original dataset shape %s' % Counter(y))
9595
Original dataset shape Counter({{1: 900, 0: 100}})
96-
>>> ee = EasyEnsemble(random_state=42)
97-
>>> X_res, y_res = ee.fit_resample(X, y)
98-
>>> print('Resampled dataset shape %s' % Counter(y_res[0]))
96+
>>> ee = EasyEnsemble(random_state=42) # doctest: +SKIP
97+
>>> X_res, y_res = ee.fit_resample(X, y) # doctest: +SKIP
98+
>>> print('Resampled dataset shape %s' % Counter(y_res[0])) # doctest: +SKIP
9999
Resampled dataset shape Counter({{0: 100, 1: 100}})
100100
101101
"""

imblearn/ensemble/tests/test_bagging.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ def test_balanced_bagging_classifier():
4747
for base_estimator in [
4848
None,
4949
DummyClassifier(),
50-
Perceptron(),
50+
Perceptron(max_iter=1000, tol=1e-3),
5151
DecisionTreeClassifier(),
5252
KNeighborsClassifier(),
53-
SVC()
53+
SVC(gamma='scale')
5454
]:
5555
for params in grid:
5656
BalancedBaggingClassifier(
@@ -155,8 +155,10 @@ def test_probability():
155155

156156
# Degenerate case, where some classes are missing
157157
ensemble = BalancedBaggingClassifier(
158-
base_estimator=LogisticRegression(), random_state=0,
159-
max_samples=5).fit(X_train, y_train)
158+
base_estimator=LogisticRegression(solver='lbfgs',
159+
multi_class='auto'),
160+
random_state=0, max_samples=5)
161+
ensemble.fit(X_train, y_train)
160162

161163
assert_array_almost_equal(
162164
np.sum(ensemble.predict_proba(X_test), axis=1),
@@ -179,7 +181,7 @@ def test_oob_score_classification():
179181
random_state=0)
180182
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
181183

182-
for base_estimator in [DecisionTreeClassifier(), SVC()]:
184+
for base_estimator in [DecisionTreeClassifier(), SVC(gamma='scale')]:
183185
clf = BalancedBaggingClassifier(
184186
base_estimator=base_estimator,
185187
n_estimators=100,
@@ -282,8 +284,8 @@ def test_gridsearch():
282284
parameters = {'n_estimators': (1, 2), 'base_estimator__C': (1, 2)}
283285

284286
GridSearchCV(
285-
BalancedBaggingClassifier(SVC()), parameters, scoring="roc_auc").fit(
286-
X, y)
287+
BalancedBaggingClassifier(SVC(gamma='scale')), parameters, cv=3,
288+
scoring="roc_auc").fit(X, y)
287289

288290

289291
def test_base_estimator():
@@ -311,7 +313,8 @@ def test_base_estimator():
311313
DecisionTreeClassifier)
312314

313315
ensemble = BalancedBaggingClassifier(
314-
Perceptron(), n_jobs=3, random_state=0).fit(X_train, y_train)
316+
Perceptron(max_iter=1000, tol=1e-3), n_jobs=3, random_state=0).fit(
317+
X_train, y_train)
315318

316319
assert isinstance(ensemble.base_estimator_.steps[-1][1], Perceptron)
317320

@@ -445,7 +448,8 @@ def test_estimators_samples():
445448

446449
# remap the y outside of the BalancedBaggingclassifier
447450
# _, y = np.unique(y, return_inverse=True)
448-
bagging = BalancedBaggingClassifier(LogisticRegression(),
451+
bagging = BalancedBaggingClassifier(LogisticRegression(solver='lbfgs',
452+
multi_class='auto'),
449453
max_samples=0.5,
450454
max_features=0.5, random_state=1,
451455
bootstrap=False)

imblearn/ensemble/tests/test_balance_cascade.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_fit_resample_auto_early_stop():
118118

119119
def test_give_classifier_obj():
120120
sampling_strategy = 'auto'
121-
estimator = RandomForestClassifier(random_state=RND_SEED)
121+
estimator = RandomForestClassifier(n_estimators=10, random_state=RND_SEED)
122122
bc = BalanceCascade(
123123
sampling_strategy=sampling_strategy,
124124
random_state=RND_SEED,

imblearn/ensemble/tests/test_easy_ensemble.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,5 +292,5 @@ def test_easy_ensemble_classifier_grid_search():
292292
'base_estimator__n_estimators': [3, 4]}
293293
grid_search = GridSearchCV(
294294
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()),
295-
parameters)
295+
parameters, cv=5, iid=False)
296296
grid_search.fit(X, y)

imblearn/keras/_generator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
from ..utils._docstring import _random_state_docstring
2525
from ..tensorflow import balanced_batch_generator as tf_bbg
2626

27+
DONT_HAVE_RANDOM_STATE = ('NearMiss', 'EditedNearestNeighbours',
28+
'RepeatedEditedNearestNeighbours', 'AllKNN',
29+
'NeighbourhoodCleaningRule', 'TomekLinks')
30+
2731

2832
class BalancedBatchGenerator(ParentClass):
2933
"""Create balanced batches when training a keras model.
@@ -122,7 +126,9 @@ def _sample(self):
122126
"which has an attribute 'return_indices'.")
123127
self.sampler_ = clone(self.sampler)
124128
self.sampler_.set_params(return_indices=True)
125-
set_random_state(self.sampler_, random_state)
129+
# FIXME: Remove in 0.6
130+
if self.sampler_.__class__.__name__ not in DONT_HAVE_RANDOM_STATE:
131+
set_random_state(self.sampler_, random_state)
126132

127133
_, _, self.indices_ = self.sampler_.fit_resample(self.X, self.y)
128134
# shuffle the indices since the sampler are packing them by class

imblearn/keras/tests/test_generator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from imblearn.datasets import make_imbalance
1414
from imblearn.under_sampling import ClusterCentroids
1515
from imblearn.under_sampling import NearMiss
16+
from imblearn.over_sampling import RandomOverSampler
1617

1718
from imblearn.keras import BalancedBatchGenerator
1819
from imblearn.keras import balanced_batch_generator
@@ -38,6 +39,7 @@ def test_balanced_batch_generator_class_no_return_indices():
3839
@pytest.mark.parametrize(
3940
"sampler, sample_weight",
4041
[(None, None),
42+
(RandomOverSampler(), None),
4143
(NearMiss(), None),
4244
(None, np.random.uniform(size=(y.shape[0])))]
4345
)
@@ -75,6 +77,7 @@ def test_balanced_batch_generator_function_no_return_indices():
7577
@pytest.mark.parametrize(
7678
"sampler, sample_weight",
7779
[(None, None),
80+
(RandomOverSampler(), None),
7881
(NearMiss(), None),
7982
(None, np.random.uniform(size=(y.shape[0])))]
8083
)

imblearn/metrics/_classification.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,11 +632,13 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
632632
tp_sum = tp_sum[indices]
633633
true_sum = true_sum[indices]
634634

635-
recall = _prf_divide(tp_sum, true_sum, "recall", "true", None,
636-
"recall")
635+
with np.errstate(divide='ignore', invalid='ignore'):
636+
recall = _prf_divide(tp_sum, true_sum, "recall", "true", None,
637+
"recall")
637638
recall[recall == 0] = correction
638639

639-
gmean = sp.stats.gmean(recall)
640+
with np.errstate(divide='ignore', invalid='ignore'):
641+
gmean = sp.stats.gmean(recall)
640642
# old version of scipy return MaskedConstant instead of 0.0
641643
if isinstance(gmean, np.ma.core.MaskedConstant):
642644
return 0.0

0 commit comments

Comments
 (0)