From dae3ba387dc8b04373a59a6326e5fbd24f983250 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Mar 2018 02:03:46 +0100
Subject: [PATCH 01/50] EHN accept one-vs-all targets

---
 imblearn/__init__.py         |  3 +++
 imblearn/base.py             | 19 ++++++++++++++++---
 imblearn/utils/validation.py | 28 +++++++++++++++++++++-------
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/imblearn/__init__.py b/imblearn/__init__.py
index 9f05adb1f..7803ca016 100644
--- a/imblearn/__init__.py
+++ b/imblearn/__init__.py
@@ -13,6 +13,9 @@
 exceptions
     Module including custom warnings and error clases used across
     imbalanced-learn.
+keras
+    Module which provides custom generator, layers for deep learning using
+    keras.
 metrics
     Module which provides metrics to quantified the classification performance
     with imbalanced dataset.
diff --git a/imblearn/base.py b/imblearn/base.py
index aa12eb365..d352d9696 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -9,9 +9,13 @@
 import logging
 from abc import ABCMeta, abstractmethod
 
+import numpy as np
+
 from sklearn.base import BaseEstimator
 from sklearn.externals import six
+from sklearn.preprocessing import label_binarize
 from sklearn.utils import check_X_y
+from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import check_is_fitted
 
 from .utils import check_ratio, check_target_type, hash_X_y
@@ -54,14 +58,23 @@ def sample(self, X, y):
             The corresponding label of `X_resampled`
 
         """
-
         # Check the consistency of X and y
+        y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
 
         check_is_fitted(self, 'ratio_')
         self._check_X_y(X, y)
 
-        return self._sample(X, y)
+        output = self._sample(X, y)
+
+        if binarize_y:
+            y_sampled = label_binarize(output[1], np.unique(y))
+            if len(output) == 2:
+                return output[0], y_sampled
+            else:
+                return output[0], y_sampled, output[2]
+        else:
+            return output
 
     def fit_sample(self, X, y):
         """Fit the statistics and resample the data directly.
@@ -152,8 +165,8 @@ def fit(self, X, y):
             Return self.
 
         """
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         y = check_target_type(y)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         self.X_hash_, self.y_hash_ = hash_X_y(X, y)
         # self.sampling_type is already checked in check_ratio
         self.ratio_ = check_ratio(self.ratio, y, self._sampling_type)
diff --git a/imblearn/utils/validation.py b/imblearn/utils/validation.py
index d009dacbe..17fa7e93a 100644
--- a/imblearn/utils/validation.py
+++ b/imblearn/utils/validation.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 
+from sklearn.preprocessing import label_binarize
 from sklearn.neighbors.base import KNeighborsMixin
 from sklearn.neighbors import NearestNeighbors
 from sklearn.externals import six, joblib
@@ -19,7 +20,7 @@
 
 SAMPLING_KIND = ('over-sampling', 'under-sampling', 'clean-sampling',
                  'ensemble')
-TARGET_KIND = ('binary', 'multiclass')
+TARGET_KIND = ('binary', 'multiclass', 'multilabel-indicator')
 
 
 def check_neighbors_object(nn_name, nn_object, additional_neighbor=0):
@@ -54,29 +55,42 @@ def check_neighbors_object(nn_name, nn_object, additional_neighbor=0):
         raise_isinstance_error(nn_name, [int, KNeighborsMixin], nn_object)
 
 
-def check_target_type(y):
+def check_target_type(y, indicate_one_vs_all=False):
     """Check the target types to be conform to the current samplers.
 
-    The current samplers should be compatible with ``'binary'`` and
-    ``'multiclass'`` targets only.
+    The current samplers should be compatible with ``'binary'``,
+    ``'multilabel-indicator'`` and ``'multiclass'`` targets only.
 
     Parameters
     ----------
     y : ndarray,
-        The array containing the target
+        The array containing the target.
+
+    indicate_one_vs_all : bool, optional
+        Either to indicate if the targets are encoded in a one-vs-all fashion.
 
     Returns
     -------
     y : ndarray,
         The returned target.
 
+    is_one_vs_all : bool, optional
+        Indicate if the target was originally encoded in a one-vs-all fashion.
+        Only returned if ``indicate_multilabel=True``.
+
     """
-    if type_of_target(y) not in TARGET_KIND:
+    type_y = type_of_target(y)
+    if type_y not in TARGET_KIND:
         # FIXME: perfectly we should raise an error but the sklearn API does
         # not allow for it
         warnings.warn("'y' should be of types {} only. Got {} instead.".format(
             TARGET_KIND, type_of_target(y)))
-    return y
+
+    if indicate_one_vs_all:
+        return (y.argmax(axis=1) if type_y == 'multilabel-indicator' else y,
+                type_y == 'multilabel-indicator')
+    else:
+        return y.argmax(axis=1) if type_y == 'multilabel-indicator' else y
 
 
 def hash_X_y(X, y, n_samples=10, n_features=5):

From 7487ce48d7037823af4253064e8a7460b9594c50 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Mar 2018 02:29:49 +0100
Subject: [PATCH 02/50] TST add test for check_target_type

---
 imblearn/utils/tests/test_validation.py | 32 +++++++++++++++++++++++++
 imblearn/utils/validation.py            |  1 -
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index a1d8585df..49e9d6997 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -6,17 +6,20 @@
 from collections import Counter
 
 import numpy as np
+import pytest
 from pytest import raises
 
 from sklearn.neighbors.base import KNeighborsMixin
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_random_state
 from sklearn.externals import joblib
+from sklearn.utils.testing import assert_array_equal
 
 from imblearn.utils.testing import warns
 from imblearn.utils import check_neighbors_object
 from imblearn.utils import check_ratio
 from imblearn.utils import hash_X_y
+from imblearn.utils import check_target_type
 
 
 def test_check_neighbors_object():
@@ -35,6 +38,35 @@ def test_check_neighbors_object():
         check_neighbors_object(name, n_neighbors)
 
 
+@pytest.mark.parametrize(
+    "target, output_target",
+    [(np.array([0, 1, 1]), np.array([0, 1, 1])),
+     (np.array([0, 1, 2]), np.array([0, 1, 2])),
+     (np.array([[0, 1], [1, 0]]), np.array([1, 0]))]
+)
+def test_check_target_type(target, output_target):
+    converted_target = check_target_type(target.astype(int))
+    assert_array_equal(converted_target, output_target.astype(int))
+
+
+@pytest.mark.parametrize(
+    "target, output_target, is_ova",
+    [(np.array([0, 1, 1]), np.array([0, 1, 1]), False),
+     (np.array([0, 1, 2]), np.array([0, 1, 2]), False),
+     (np.array([[0, 1], [1, 0]]), np.array([1, 0]), True)]
+)
+def test_check_target_type_ova(target, output_target, is_ova):
+    converted_target, binarize_target = check_target_type(
+        target.astype(int), indicate_one_vs_all=True)
+    assert_array_equal(converted_target, output_target.astype(int))
+    assert binarize_target == is_ova
+
+
+def test_check_target_warning():
+    target = np.arange(4).reshape((2, 2))
+    with pytest.warns(UserWarning, message='should be of types'):
+        check_target_type(target)
+
 def test_check_ratio_error():
     with raises(ValueError, match="'sampling_type' should be one of"):
         check_ratio('auto', np.array([1, 2, 3]), 'rnd')
diff --git a/imblearn/utils/validation.py b/imblearn/utils/validation.py
index 17fa7e93a..58488463a 100644
--- a/imblearn/utils/validation.py
+++ b/imblearn/utils/validation.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 
-from sklearn.preprocessing import label_binarize
 from sklearn.neighbors.base import KNeighborsMixin
 from sklearn.neighbors import NearestNeighbors
 from sklearn.externals import six, joblib

From 05ae2e6ff833f033086b31b4548545f0f31c4157 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Mar 2018 02:30:45 +0100
Subject: [PATCH 03/50] PEP8

---
 imblearn/utils/tests/test_validation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index 49e9d6997..64a23eb4a 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -67,6 +67,7 @@ def test_check_target_warning():
     with pytest.warns(UserWarning, message='should be of types'):
         check_target_type(target)
 
+
 def test_check_ratio_error():
     with raises(ValueError, match="'sampling_type' should be one of"):
         check_ratio('auto', np.array([1, 2, 3]), 'rnd')

From 05b7d65798e1baad01316f3e1b9f1913f7bf7cad Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Mar 2018 02:33:55 +0100
Subject: [PATCH 04/50] TST fix pytests match warns

---
 imblearn/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/utils/tests/test_validation.py b/imblearn/utils/tests/test_validation.py
index 64a23eb4a..bed62617d 100644
--- a/imblearn/utils/tests/test_validation.py
+++ b/imblearn/utils/tests/test_validation.py
@@ -64,7 +64,7 @@ def test_check_target_type_ova(target, output_target, is_ova):
 
 def test_check_target_warning():
     target = np.arange(4).reshape((2, 2))
-    with pytest.warns(UserWarning, message='should be of types'):
+    with pytest.warns(UserWarning, match='should be of types'):
         check_target_type(target)
 
 

From 1a27e3e80b6020fe63dfc922821a86adac36a5c7 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Mar 2018 03:26:24 +0100
Subject: [PATCH 05/50] TST common test to check multiclass ova equality

---
 imblearn/base.py                     | 29 +++++++++++++++-------------
 imblearn/combine/smote_enn.py        |  2 +-
 imblearn/combine/smote_tomek.py      |  3 +--
 imblearn/ensemble/balance_cascade.py |  3 ++-
 imblearn/utils/estimator_checks.py   | 18 +++++++++++++++++
 5 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index d352d9696..a44831c0b 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -15,7 +15,6 @@
 from sklearn.externals import six
 from sklearn.preprocessing import label_binarize
 from sklearn.utils import check_X_y
-from sklearn.utils.multiclass import type_of_target
 from sklearn.utils.validation import check_is_fitted
 
 from .utils import check_ratio, check_target_type, hash_X_y
@@ -245,17 +244,10 @@ def __init__(self, func=None, accept_sparse=True, kw_args=None):
         self.kw_args = kw_args
         self.logger = logging.getLogger(__name__)
 
-    def _check_X_y(self, X, y):
-        if self.accept_sparse:
-            X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
-        else:
-            X, y = check_X_y(X, y, accept_sparse=False)
-        y = check_target_type(y)
-
-        return X, y
-
     def fit(self, X, y):
-        X, y = self._check_X_y(X, y)
+        y = check_target_type(y)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']
+                         if self.accept_sparse else False)
         self.X_hash_, self.y_hash_ = hash_X_y(X, y)
         # when using a sampler, ratio_ is supposed to exist after fit
         self.ratio_ = 'is_fitted'
@@ -263,7 +255,9 @@ def fit(self, X, y):
         return self
 
     def _sample(self, X, y, func=None, kw_args=None):
-        X, y = self._check_X_y(X, y)
+        y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']
+                         if self.accept_sparse else False)
         check_is_fitted(self, 'ratio_')
         X_hash, y_hash = hash_X_y(X, y)
         if self.X_hash_ != X_hash or self.y_hash_ != y_hash:
@@ -272,7 +266,16 @@ def _sample(self, X, y, func=None, kw_args=None):
         if func is None:
             func = _identity
 
-        return func(X, y, **(kw_args if self.kw_args else {}))
+        output = func(X, y, **(kw_args if self.kw_args else {}))
+
+        if binarize_y:
+            y_sampled = label_binarize(output[1], np.unique(y))
+            if len(output) == 2:
+                return output[0], y_sampled
+            else:
+                return output[0], y_sampled, output[2]
+        else:
+            return output
 
     def sample(self, X, y):
         return self._sample(X, y, func=self.func, kw_args=self.kw_args)
diff --git a/imblearn/combine/smote_enn.py b/imblearn/combine/smote_enn.py
index 74420472b..470919878 100644
--- a/imblearn/combine/smote_enn.py
+++ b/imblearn/combine/smote_enn.py
@@ -144,8 +144,8 @@ def fit(self, X, y):
             Return self.
 
         """
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         y = check_target_type(y)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         self.ratio_ = self.ratio
         self.X_hash_, self.y_hash_ = hash_X_y(X, y)
 
diff --git a/imblearn/combine/smote_tomek.py b/imblearn/combine/smote_tomek.py
index b48e6510a..0748e6ef7 100644
--- a/imblearn/combine/smote_tomek.py
+++ b/imblearn/combine/smote_tomek.py
@@ -8,7 +8,6 @@
 from __future__ import division
 
 import logging
-import warnings
 
 from sklearn.utils import check_X_y
 
@@ -153,8 +152,8 @@ def fit(self, X, y):
             Return self.
 
         """
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         y = check_target_type(y)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
         self.ratio_ = self.ratio
         self.X_hash_, self.y_hash_ = hash_X_y(X, y)
 
diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
index bc6a06c6f..6668209ea 100644
--- a/imblearn/ensemble/balance_cascade.py
+++ b/imblearn/ensemble/balance_cascade.py
@@ -14,7 +14,7 @@
 from sklearn.model_selection import cross_val_predict
 
 from .base import BaseEnsembleSampler
-from ..utils import check_ratio
+from ..utils import check_ratio, check_target_type
 
 
 class BalanceCascade(BaseEnsembleSampler):
@@ -137,6 +137,7 @@ def fit(self, X, y):
 
         """
         super(BalanceCascade, self).fit(X, y)
+        y = check_target_type(y)
         self.ratio_ = check_ratio(self.ratio, y, 'under-sampling')
         return self
 
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 2184b4b12..bd37e408a 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -19,11 +19,13 @@
 
 from sklearn.datasets import make_classification
 from sklearn.cluster import KMeans
+from sklearn.preprocessing import label_binarize
 from sklearn.utils.estimator_checks import check_estimator \
     as sklearn_check_estimator, check_parameters_default_constructible
 from sklearn.exceptions import NotFittedError
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import set_random_state
+from sklearn.utils.multiclass import type_of_target
 
 from imblearn.over_sampling.base import BaseOverSampler
 from imblearn.under_sampling.base import BaseCleaningSampler, BaseUnderSampler
@@ -44,6 +46,7 @@ def _yield_sampler_checks(name, Estimator):
     yield check_samplers_ratio_fit_sample
     yield check_samplers_sparse
     yield check_samplers_pandas
+    yield check_samplers_multiclass_ova
 
 
 def _yield_all_checks(name, estimator):
@@ -253,3 +256,18 @@ def check_samplers_pandas(name, Sampler):
         X_res, y_res = sampler.fit_sample(X, y)
         assert_allclose(X_res_pd, X_res)
         assert_allclose(y_res_pd, y_res)
+
+
+def check_samplers_multiclass_ova(name, Sampler):
+    # Check that multiclass target lead to the same results than OVA encoding
+    X, y = make_classification(n_samples=1000, n_classes=3,
+                               n_informative=4, weights=[0.2, 0.3, 0.5],
+                               random_state=0)
+    y_ova = label_binarize(y, np.unique(y))
+    sampler = Sampler()
+    set_random_state(sampler)
+    X_res, y_res = sampler.fit_sample(X, y)
+    X_res_ova, y_res_ova = sampler.fit_sample(X, y_ova)
+    assert_allclose(X_res, X_res_ova)
+    assert type_of_target(y_res_ova) == type_of_target(y_ova)
+    assert_allclose(y_res, y_res_ova.argmax(axis=1))

From 47bbbf41cb2be4748f962141f88c74d7227d7147 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 4 Apr 2018 17:07:07 +0200
Subject: [PATCH 06/50] Add keras module

---
 imblearn/keras/__init__.py       | 6 ++++++
 imblearn/keras/generator.py      | 5 +++++
 imblearn/keras/tests/__init__.py | 0
 3 files changed, 11 insertions(+)
 create mode 100644 imblearn/keras/__init__.py
 create mode 100644 imblearn/keras/generator.py
 create mode 100644 imblearn/keras/tests/__init__.py

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
new file mode 100644
index 000000000..cf8949267
--- /dev/null
+++ b/imblearn/keras/__init__.py
@@ -0,0 +1,6 @@
+"""The :mod:`imblearn.keras` provides utilities to deal with imbalanced dataset
+in keras."""
+
+from .generator import balanced_batch_generator
+
+__all__ = ['balanced_batch_generator']
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
new file mode 100644
index 000000000..13b3d5642
--- /dev/null
+++ b/imblearn/keras/generator.py
@@ -0,0 +1,5 @@
+"""Implement generators which will balance data."""
+
+
+def balanced_batch_generator():
+    pass
diff --git a/imblearn/keras/tests/__init__.py b/imblearn/keras/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb

From e6a318706eb0fecbb36c690d912d9dd7dee0a839 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Apr 2018 00:20:41 +0200
Subject: [PATCH 07/50] TST for generator class

---
 imblearn/keras/__init__.py         |  6 ++-
 imblearn/keras/generator.py        | 71 ++++++++++++++++++++++++++++--
 imblearn/keras/tests/test_keras.py | 22 +++++++++
 3 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 imblearn/keras/tests/test_keras.py

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index cf8949267..a605034c0 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -1,6 +1,8 @@
 """The :mod:`imblearn.keras` provides utilities to deal with imbalanced dataset
 in keras."""
 
-from .generator import balanced_batch_generator
+from .generator import BalancedBatchGenerator
+# from .generator import balanced_batch_generator
 
-__all__ = ['balanced_batch_generator']
+__all__ = ['BalancedBatchGenerator']  #,
+           # 'balanced_batch_generator']
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index 13b3d5642..ac3288f2b 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -1,5 +1,70 @@
-"""Implement generators which will balance data."""
+"""Implement generators for ``keras`` which will balance the data."""
 
+import keras
 
-def balanced_batch_generator():
-    pass
+from sklearn.base import clone
+from sklearn.utils import safe_indexing
+
+from ..under_sampling import RandomUnderSampler
+
+
+class BalancedBatchGenerator(keras.utils.Sequence):
+    """
+
+    """
+    def __init__(self, X, y, sampler=None, batch_size=64, stratify=True):
+        self.X = X
+        self.y = y
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.stratify = stratify
+        self._sample()
+
+    def _sample(self):
+        if self.sampler is None:
+            self.sampler_ = RandomUnderSampler(return_indices=True)
+        else:
+            if not hasattr(self.sampler, 'return_indices'):
+                raise ValueError("'sampler' needs to return the indices of "
+                                 "the samples selected. Provide a sampler "
+                                 "which has an attribute 'return_indices'.")
+            self.sampler_ = clone(self.sampler)
+            self.sampler_.set_params(return_indices=True)
+
+        _, _, self.indices_ = self.sampler_.fit_sample(self.X, self.y)
+
+    def __len__(self):
+        return int(self.indices_.size // self.batch_size)
+
+    def __getitem__(self, index):
+        return (safe_indexing(self.X,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.y,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]))
+
+
+# def balanced_batch_generator(X, y, sampler=None, batch_size=64,
+#                              stratify=True):
+#     """Create a balanced batch generator which can be plugged in
+#     ``keras.fit_genertor``.
+
+#     Parameters
+#     ----------
+
+#     """
+#     if sampler is None:
+#         sampler = RandomUnderSampler()
+#     else:
+#         if not hasattr(sampler, 'return_indices'):
+#             raise ValueError("'sampler' needs to return the indices of "
+#                              "the samples selected. Provide a sampler which "
+#                              "has an attribute 'return_indices'.")
+#         sampler.set_params(return_indices=True)
+
+#     def generator(X=X, y=y, indices=indices, batch_size=batch_size,
+#                   stratify=stratify):
+
+
+#     _, _, indices = sampler.fit_sample(X, y)
diff --git a/imblearn/keras/tests/test_keras.py b/imblearn/keras/tests/test_keras.py
new file mode 100644
index 000000000..bf741f504
--- /dev/null
+++ b/imblearn/keras/tests/test_keras.py
@@ -0,0 +1,22 @@
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.utils import to_categorical
+
+from sklearn.datasets import load_iris
+
+from imblearn.keras import BalancedBatchGenerator
+
+
+iris = load_iris()
+X, y = iris.data, to_categorical(iris.target, 3)
+
+
+def test_balanced_batch_generator():
+    model = Sequential()
+    model.add(Dense(y.shape[1], input_dim=X.shape[1], activation='softmax'))
+    model.compile(optimizer='sgd', loss='categorical_crossentropy',
+                  metrics=['accuracy'])
+    training_generator = BalancedBatchGenerator(X, y)
+    model.fit_generator(generator=training_generator,
+                        epochs=10,
+                        verbose=10)

From 01492a688c3ac64ce6238200e550f98df318ef6f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Apr 2018 00:28:01 +0200
Subject: [PATCH 08/50] dependencies

---
 appveyor.yml                       | 3 ++-
 build_tools/circle/build_doc.sh    | 2 +-
 build_tools/travis/install.sh      | 3 ++-
 imblearn/keras/tests/test_keras.py | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index a09272080..82b79b2da 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -32,7 +32,8 @@ install:
   # Add Library/bin directory to fix issue
   # https://github.com/conda/conda/issues/1753
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%"
-  - conda install pip scipy numpy scikit-learn=0.19 pandas -y -q
+  - conda install pip scipy numpy scikit-learn=0.19 -y -q
+  - conda install pandas keras -y -q
   - conda install pytest pytest-cov -y -q
   - conda install nose -y -q  # FIXME: remove this line when using sklearn > 0.19
   - pip install .
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index e49088ae6..4b4ff915e 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -92,7 +92,7 @@ conda create -n $CONDA_ENV_NAME --yes --quiet python=3
 source activate $CONDA_ENV_NAME
 
 conda install --yes pip numpy scipy scikit-learn pillow matplotlib sphinx \
-      sphinx_rtd_theme numpydoc
+      sphinx_rtd_theme numpydoc pandas keras
 pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
 
 # Build and install imbalanced-learn in dev mode
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 415e4ce5d..3e3b062e8 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -38,7 +38,8 @@ if [[ "$DISTRIB" == "conda" ]]; then
     # provided versions
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
-    conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION pandas
+    conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
+    conds install --yes pandas keras
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython
diff --git a/imblearn/keras/tests/test_keras.py b/imblearn/keras/tests/test_keras.py
index bf741f504..eecf5c3a3 100644
--- a/imblearn/keras/tests/test_keras.py
+++ b/imblearn/keras/tests/test_keras.py
@@ -4,9 +4,9 @@
 
 from sklearn.datasets import load_iris
 
+from imblearn.datasets import make_imbalance
 from imblearn.keras import BalancedBatchGenerator
 
-
 iris = load_iris()
 X, y = iris.data, to_categorical(iris.target, 3)
 

From 54a0f033e30652a816d442b266dfb2808ce4144e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Apr 2018 00:31:55 +0200
Subject: [PATCH 09/50] update dependencies

---
 build_tools/travis/install.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 3e3b062e8..41743ef9f 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -60,8 +60,9 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     # Create a new virtualenv using system site packages for python, numpy
     virtualenv --system-site-packages testvenv
     source testvenv/bin/activate
-    pip install scikit-learn pandas nose nose-timer pytest pytest-cov codecov \
-        sphinx numpydoc
+    pip install scikit-learn
+    pip install pandas keras tensorflow
+    pip install nose nose-timer pytest pytest-cov codecov sphinx numpydoc
 
 fi
 

From 77e944d3a6db60650bdcded8918a2f188fd62574 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Apr 2018 00:32:24 +0200
Subject: [PATCH 10/50] iter

---
 build_tools/travis/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 41743ef9f..e094117cc 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -39,7 +39,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
-    conds install --yes pandas keras
+    conda install --yes pandas keras
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython

From 7e138336da3d42b2ea3049a212870beeeb87578d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 01:06:28 +0200
Subject: [PATCH 11/50] TST test the keras class

---
 appveyor.yml                       |  3 +-
 imblearn/keras/generator.py        | 75 ++++++++++++++++++++++++++++--
 imblearn/keras/tests/test_keras.py | 41 +++++++++++++---
 3 files changed, 107 insertions(+), 12 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 82b79b2da..10cb95eea 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -33,7 +33,8 @@ install:
   # https://github.com/conda/conda/issues/1753
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%"
   - conda install pip scipy numpy scikit-learn=0.19 -y -q
-  - conda install pandas keras -y -q
+  - conda install pandas -y -q
+  - conda install -c conda-forge keras -y -q
   - conda install pytest pytest-cov -y -q
   - conda install nose -y -q  # FIXME: remove this line when using sklearn > 0.19
   - pip install .
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index ac3288f2b..1e77d1ffb 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -1,28 +1,90 @@
 """Implement generators for ``keras`` which will balance the data."""
 
-import keras
+try:
+    import keras
+except ImportError:
+    # Skip the tests for the examples
+    import pytest
+    keras = pytest.importorskip('keras')
+    raise ImportError("To use the imblearn.keras module, you need to install "
+                      "keras.")
 
 from sklearn.base import clone
 from sklearn.utils import safe_indexing
+from sklearn.utils import check_random_state
+from sklearn.utils.testing import set_random_state
 
 from ..under_sampling import RandomUnderSampler
 
 
+# FIXME: add docstring for random_state using Substitution
 class BalancedBatchGenerator(keras.utils.Sequence):
-    """
+    """Create balanced batches when training a keras model.
+
+    Create a keras ``Sequence`` which is given to ``fit_generator``. The
+    sampler defines the sampling strategy used to balance the dataset ahead of
+    creating the batch. The sampler should have an attribute
+    ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    {random_state}
+
+    Attributes
+    ----------
+    sampler_ : object
+        The sampler used to balance the dataset.
+
+    indices_ : ndarray, shape (n_samples, n_features)
+        The indices of the samples selected during sampling.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> iris = load_iris()
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    >>> y = keras.utils.to_categorical(y, 3)
+    >>> import keras
+    >>> model = keras.models.Sequential()
+    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+    ...                              activation='softmax'))
+    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+    ...               metrics=['accuracy'])
+    >>> from imblearn.keras import BalancedBatchGenerator
+    >>> from imblearn.under_sampling import NearMiss
+    >>> training_generator = BalancedBatchGenerator(
+    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
+    >>> callback_history = model.fit_generator(generator=training_generator,
+    ...                                        epochs=10, verbose=0)
+
 
     """
-    def __init__(self, X, y, sampler=None, batch_size=64, stratify=True):
+    def __init__(self, X, y, sampler=None, batch_size=32, random_state=None):
         self.X = X
         self.y = y
         self.sampler = sampler
         self.batch_size = batch_size
-        self.stratify = stratify
+        self.random_state = random_state
         self._sample()
 
     def _sample(self):
+        random_state = check_random_state(self.random_state)
         if self.sampler is None:
-            self.sampler_ = RandomUnderSampler(return_indices=True)
+            self.sampler_ = RandomUnderSampler(return_indices=True,
+                                               random_state=random_state)
         else:
             if not hasattr(self.sampler, 'return_indices'):
                 raise ValueError("'sampler' needs to return the indices of "
@@ -30,8 +92,11 @@ def _sample(self):
                                  "which has an attribute 'return_indices'.")
             self.sampler_ = clone(self.sampler)
             self.sampler_.set_params(return_indices=True)
+            set_random_state(self.sampler_, random_state)
 
         _, _, self.indices_ = self.sampler_.fit_sample(self.X, self.y)
+        # shuffle the indices since the sampler are packing them by class
+        random_state.shuffle(self.indices_)
 
     def __len__(self):
         return int(self.indices_.size // self.batch_size)
diff --git a/imblearn/keras/tests/test_keras.py b/imblearn/keras/tests/test_keras.py
index eecf5c3a3..2f789e8ec 100644
--- a/imblearn/keras/tests/test_keras.py
+++ b/imblearn/keras/tests/test_keras.py
@@ -1,3 +1,7 @@
+import pytest
+
+keras = pytest.importorskip('keras')
+
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.utils import to_categorical
@@ -6,17 +10,42 @@
 
 from imblearn.datasets import make_imbalance
 from imblearn.keras import BalancedBatchGenerator
+from imblearn.under_sampling import ClusterCentroids
+from imblearn.under_sampling import NearMiss
 
 iris = load_iris()
-X, y = iris.data, to_categorical(iris.target, 3)
+X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+y = to_categorical(y, 3)
 
 
-def test_balanced_batch_generator():
+def _build_keras_model(n_classes, n_features):
     model = Sequential()
-    model.add(Dense(y.shape[1], input_dim=X.shape[1], activation='softmax'))
+    model.add(Dense(n_classes, input_dim=n_features, activation='softmax'))
     model.compile(optimizer='sgd', loss='categorical_crossentropy',
                   metrics=['accuracy'])
-    training_generator = BalancedBatchGenerator(X, y)
+    return model
+
+
+def test_balanced_batch_generator_class_no_return_indices():
+    model = _build_keras_model(y.shape[1], X.shape[1])
+    with pytest.raises(ValueError, match='needs to return the indices'):
+        training_generator = BalancedBatchGenerator(X, y,
+                                                    sampler=ClusterCentroids(),
+                                                    batch_size=10,
+                                                    random_state=42)
+        model.fit_generator(generator=training_generator,
+                            epochs=10)
+
+
+@pytest.mark.parametrize(
+    "sampler",
+    [None, NearMiss()]
+)
+def test_balanced_batch_generator_class(sampler):
+    model = _build_keras_model(y.shape[1], X.shape[1])
+    training_generator = BalancedBatchGenerator(X, y,
+                                                sampler=sampler,
+                                                batch_size=10,
+                                                random_state=42)
     model.fit_generator(generator=training_generator,
-                        epochs=10,
-                        verbose=10)
+                        epochs=10)

From 182b40837bcc8738eb2632e1984cbb315e468324 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 09:13:19 +0200
Subject: [PATCH 12/50] optional dep windows

---
 appveyor.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 10cb95eea..ef06ad0ba 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -10,22 +10,27 @@ environment:
     - PYTHON: "C:\\Miniconda-x64"
       PYTHON_VERSION: "2.7.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas"
 
     - PYTHON: "C:\\Miniconda"
       PYTHON_VERSION: "2.7.x"
       PYTHON_ARCH: "32"
+      OPTIONAL_DEP: "pandas"
 
     - PYTHON: "C:\\Miniconda35-x64"
       PYTHON_VERSION: "3.5.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas keras tensorflow"
 
     - PYTHON: "C:\\Miniconda36-x64"
       PYTHON_VERSION: "3.6.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas keras tensorflow"
 
     - PYTHON: "C:\\Miniconda36"
       PYTHON_VERSION: "3.6.x"
       PYTHON_ARCH: "32"
+      OPTIONAL_DEP: "pandas"
 
 install:
   # Prepend miniconda installed Python to the PATH of this build
@@ -33,8 +38,7 @@ install:
   # https://github.com/conda/conda/issues/1753
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%"
   - conda install pip scipy numpy scikit-learn=0.19 -y -q
-  - conda install pandas -y -q
-  - conda install -c conda-forge keras -y -q
+  - "conda install %OPTIONAL_DEP% -y -q"
   - conda install pytest pytest-cov -y -q
   - conda install nose -y -q  # FIXME: remove this line when using sklearn > 0.19
   - pip install .

From 68f8454312d81b3882f5dcd7055086c6cf3058de Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 10:09:33 +0200
Subject: [PATCH 13/50] Add generator function

---
 imblearn/keras/__init__.py                    |   6 +-
 imblearn/keras/generator.py                   | 135 +++++++++++++++---
 .../{test_keras.py => test_generator.py}      |  40 +++++-
 3 files changed, 152 insertions(+), 29 deletions(-)
 rename imblearn/keras/tests/{test_keras.py => test_generator.py} (56%)

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index a605034c0..8acdd2b03 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -2,7 +2,7 @@
 in keras."""
 
 from .generator import BalancedBatchGenerator
-# from .generator import balanced_batch_generator
+from .generator import balanced_batch_generator
 
-__all__ = ['BalancedBatchGenerator']  #,
-           # 'balanced_batch_generator']
+__all__ = ['BalancedBatchGenerator',
+           'balanced_batch_generator']
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index 1e77d1ffb..b49489ac3 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -1,4 +1,5 @@
 """Implement generators for ``keras`` which will balance the data."""
+from __future__ import division
 
 try:
     import keras
@@ -34,6 +35,9 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     y : ndarray, shape (n_samples,) or (n_samples, n_classes)
         Associated targets.
 
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
     sampler : object or None, optional (default=None)
         A sampler instance which has an attribute ``return_indices``.
 
@@ -70,11 +74,12 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     >>> callback_history = model.fit_generator(generator=training_generator,
     ...                                        epochs=10, verbose=0)
 
-
     """
-    def __init__(self, X, y, sampler=None, batch_size=32, random_state=None):
+    def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
+                 random_state=None):
         self.X = X
         self.y = y
+        self.sample_weight = sample_weight
         self.sampler = sampler
         self.batch_size = batch_size
         self.random_state = random_state
@@ -102,34 +107,120 @@ def __len__(self):
         return int(self.indices_.size // self.batch_size)
 
     def __getitem__(self, index):
-        return (safe_indexing(self.X,
+        if self.sample_weight is None:
+            return (
+                safe_indexing(self.X,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.y,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size])
+            )
+        else:
+            return (
+                safe_indexing(self.X,
                               self.indices_[index * self.batch_size:
                                             (index + 1) * self.batch_size]),
                 safe_indexing(self.y,
                               self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]))
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.sample_weight,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size])
+            )
+
+
+def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
+                             batch_size=32, random_state=None):
+    """Create a balanced batch generator to train keras model.
+
+    Returns a generator --- as well as the number of step per epoch --- which
+    is given to ``fit_generator``. The sampler defines the sampling strategy
+    used to balance the dataset ahead of creating the batch. The sampler should
+    have an attribute ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
 
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
 
-# def balanced_batch_generator(X, y, sampler=None, batch_size=64,
-#                              stratify=True):
-#     """Create a balanced batch generator which can be plugged in
-#     ``keras.fit_genertor``.
+    {random_state}
 
-#     Parameters
-#     ----------
+    Returns
+    -------
+    generator : generator of tuple
+        Generate batch of data. The tuple generated are either (X_batch,
+        y_batch) or (X_batch, y_batch, sampler_weight_batch).
 
-#     """
-#     if sampler is None:
-#         sampler = RandomUnderSampler()
-#     else:
-#         if not hasattr(sampler, 'return_indices'):
-#             raise ValueError("'sampler' needs to return the indices of "
-#                              "the samples selected. Provide a sampler which "
-#                              "has an attribute 'return_indices'.")
-#         sampler.set_params(return_indices=True)
+    steps_per_epoch : int
+        The number of samples per epoch. Required by ``fit_generator`` in
+        keras.
 
-#     def generator(X=X, y=y, indices=indices, batch_size=batch_size,
-#                   stratify=stratify):
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> iris = load_iris()
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    >>> y = keras.utils.to_categorical(y, 3)
+    >>> import keras
+    >>> model = keras.models.Sequential()
+    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+    ...                              activation='softmax'))
+    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+    ...               metrics=['accuracy'])
+    >>> from imblearn.keras import balanced_batch_generator
+    >>> from imblearn.under_sampling import NearMiss
+    >>> training_generator, steps_per_epoch = balanced_batch_generator(
+    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
+    >>> callback_history = model.fit_generator(generator=training_generator,
+    ...                                        steps_per_epoch=steps_per_epoch,
+    ...                                        epochs=10, verbose=0)
 
 
-#     _, _, indices = sampler.fit_sample(X, y)
+    """
+    random_state = check_random_state(random_state)
+    if sampler is None:
+        sampler_ = RandomUnderSampler(return_indices=True,
+                                      random_state=random_state)
+    else:
+        if not hasattr(sampler, 'return_indices'):
+            raise ValueError("'sampler' needs to return the indices of "
+                             "the samples selected. Provide a sampler "
+                             "which has an attribute 'return_indices'.")
+        sampler_ = clone(sampler)
+        sampler_.set_params(return_indices=True)
+        set_random_state(sampler_, random_state)
+
+    _, _, indices = sampler_.fit_sample(X, y)
+    # shuffle the indices since the sampler are packing them by class
+    random_state.shuffle(indices)
+
+    def generator(X, y, sample_weight, indices, batch_size):
+        if sample_weight is None:
+            while True:
+                for index in range(0, len(indices), batch_size):
+                    yield (safe_indexing(X, indices[index:index + batch_size]),
+                           safe_indexing(y, indices[index:index + batch_size]))
+        else:
+            while True:
+                for index in range(0, len(indices), batch_size):
+                    yield (safe_indexing(X, indices[index:index + batch_size]),
+                           safe_indexing(y, indices[index:index + batch_size]),
+                           safe_indexing(sample_weight,
+                                         indices[index:index + batch_size]))
+
+    return (generator(X, y, sample_weight, indices, batch_size),
+            int(indices.size // batch_size))
diff --git a/imblearn/keras/tests/test_keras.py b/imblearn/keras/tests/test_generator.py
similarity index 56%
rename from imblearn/keras/tests/test_keras.py
rename to imblearn/keras/tests/test_generator.py
index 2f789e8ec..ec1650b00 100644
--- a/imblearn/keras/tests/test_keras.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -1,5 +1,7 @@
 import pytest
 
+import numpy as np
+
 keras = pytest.importorskip('keras')
 
 from keras.models import Sequential
@@ -9,10 +11,12 @@
 from sklearn.datasets import load_iris
 
 from imblearn.datasets import make_imbalance
-from imblearn.keras import BalancedBatchGenerator
 from imblearn.under_sampling import ClusterCentroids
 from imblearn.under_sampling import NearMiss
 
+from imblearn.keras import BalancedBatchGenerator
+from imblearn.keras import balanced_batch_generator
+
 iris = load_iris()
 X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
 y = to_categorical(y, 3)
@@ -38,14 +42,42 @@ def test_balanced_batch_generator_class_no_return_indices():
 
 
 @pytest.mark.parametrize(
-    "sampler",
-    [None, NearMiss()]
+    "sampler, sample_weight",
+    [(None, None),
+     (NearMiss(), None),
+     (None, np.random.uniform(size=(y.shape[0])))]
 )
-def test_balanced_batch_generator_class(sampler):
+def test_balanced_batch_generator_class(sampler, sample_weight):
     model = _build_keras_model(y.shape[1], X.shape[1])
     training_generator = BalancedBatchGenerator(X, y,
+                                                sample_weight=sample_weight,
                                                 sampler=sampler,
                                                 batch_size=10,
                                                 random_state=42)
     model.fit_generator(generator=training_generator,
                         epochs=10)
+
+
+def test_balanced_batch_generator_function_no_return_indices():
+    model = _build_keras_model(y.shape[1], X.shape[1])
+    with pytest.raises(ValueError, match='needs to return the indices'):
+        training_generator, sample_per_epoch = balanced_batch_generator(
+            X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42)
+        model.fit_generator(generator=training_generator,
+                            epochs=10)
+
+
+@pytest.mark.parametrize(
+    "sampler, sample_weight",
+    [(None, None),
+     (NearMiss(), None),
+     (None, np.random.uniform(size=(y.shape[0])))]
+)
+def test_balanced_batch_generator_function(sampler, sample_weight):
+    model = _build_keras_model(y.shape[1], X.shape[1])
+    training_generator, steps_per_epoch = balanced_batch_generator(
+        X, y, sample_weight=sample_weight, sampler=sampler, batch_size=10,
+        random_state=42)
+    model.fit_generator(generator=training_generator,
+                        steps_per_epoch=steps_per_epoch,
+                        epochs=10)

From c7b1c4842fa467bd06eb385eb8ff0cecc140243a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 11:36:46 +0200
Subject: [PATCH 14/50] upload windows branch coverage

---
 appveyor.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index ef06ad0ba..ed88b09fb 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -47,3 +47,12 @@ test_script:
   - mkdir for_test
   - cd for_test
   - pytest --pyargs imblearn --cov-report term-missing --cov=imblearn
+
+after_test:
+  - if not exist dist mkdir dist
+  - if exist .coverage (cp .coverage dist\) else (echo no .coverage)
+  - codecov
+  - if exist coverage.xml (cp coverage.xml dist\) else (echo no coverage.xml)
+
+artifacts:
+  - path: dist\*

From e3bb2f74deb33f21cc8e17e98575e1aade658621 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 11:43:13 +0200
Subject: [PATCH 15/50] add codecov on appveyor

---
 appveyor.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/appveyor.yml b/appveyor.yml
index ed88b09fb..caaa0e28e 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,6 +40,7 @@ install:
   - conda install pip scipy numpy scikit-learn=0.19 -y -q
   - "conda install %OPTIONAL_DEP% -y -q"
   - conda install pytest pytest-cov -y -q
+  - pip install codecov
   - conda install nose -y -q  # FIXME: remove this line when using sklearn > 0.19
   - pip install .
 

From 8afd2cd0a6bf92731c95f98bd63f6a3ea831d576 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 12:07:03 +0200
Subject: [PATCH 16/50] simplify codecov

---
 appveyor.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index caaa0e28e..6bb885553 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -50,10 +50,6 @@ test_script:
   - pytest --pyargs imblearn --cov-report term-missing --cov=imblearn
 
 after_test:
-  - if not exist dist mkdir dist
-  - if exist .coverage (cp .coverage dist\) else (echo no .coverage)
+  - cp .coverage %APPVEYOR_BUILD_FOLDER%
+  - cd %APPVEYOR_BUILD_FOLDER%
   - codecov
-  - if exist coverage.xml (cp coverage.xml dist\) else (echo no coverage.xml)
-
-artifacts:
-  - path: dist\*

From b2c560a078e06dff1491f1589da23487affc6878 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 6 Apr 2018 13:21:35 +0200
Subject: [PATCH 17/50] remove useless statement

---
 imblearn/keras/tests/test_generator.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index ec1650b00..31bdd1478 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -31,14 +31,8 @@ def _build_keras_model(n_classes, n_features):
 
 
 def test_balanced_batch_generator_class_no_return_indices():
-    model = _build_keras_model(y.shape[1], X.shape[1])
     with pytest.raises(ValueError, match='needs to return the indices'):
-        training_generator = BalancedBatchGenerator(X, y,
-                                                    sampler=ClusterCentroids(),
-                                                    batch_size=10,
-                                                    random_state=42)
-        model.fit_generator(generator=training_generator,
-                            epochs=10)
+        BalancedBatchGenerator(X, y, sampler=ClusterCentroids(), batch_size=10)
 
 
 @pytest.mark.parametrize(
@@ -59,12 +53,9 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
 
 
 def test_balanced_batch_generator_function_no_return_indices():
-    model = _build_keras_model(y.shape[1], X.shape[1])
     with pytest.raises(ValueError, match='needs to return the indices'):
-        training_generator, sample_per_epoch = balanced_batch_generator(
+        balanced_batch_generator(
             X, y, sampler=ClusterCentroids(), batch_size=10, random_state=42)
-        model.fit_generator(generator=training_generator,
-                            epochs=10)
 
 
 @pytest.mark.parametrize(

From f7aa74538d6ce474d012b6b2fb8b084e7cb7d888 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 9 May 2018 19:15:44 +0200
Subject: [PATCH 18/50] fix

---
 conftest.py                            |  1 +
 imblearn/keras/generator.py            | 15 ++++++---------
 imblearn/keras/tests/test_generator.py |  1 -
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/conftest.py b/conftest.py
index 110fdd479..31da29707 100644
--- a/conftest.py
+++ b/conftest.py
@@ -8,6 +8,7 @@
 # Set numpy array str/repr to legacy behaviour on numpy > 1.13 to make
 # the doctests pass
 import numpy as np
+
 try:
     np.set_printoptions(legacy='1.13')
 except TypeError:
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index b49489ac3..0ac16941b 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -1,14 +1,7 @@
 """Implement generators for ``keras`` which will balance the data."""
 from __future__ import division
 
-try:
-    import keras
-except ImportError:
-    # Skip the tests for the examples
-    import pytest
-    keras = pytest.importorskip('keras')
-    raise ImportError("To use the imblearn.keras module, you need to install "
-                      "keras.")
+import pytest
 
 from sklearn.base import clone
 from sklearn.utils import safe_indexing
@@ -16,9 +9,13 @@
 from sklearn.utils.testing import set_random_state
 
 from ..under_sampling import RandomUnderSampler
+from ..utils import Substitution
+from ..utils._docstring import _random_state_docstring
 
+keras = pytest.importorskip("keras")
 
-# FIXME: add docstring for random_state using Substitution
+
+@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index 31bdd1478..d1107d2e5 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -3,7 +3,6 @@
 import numpy as np
 
 keras = pytest.importorskip('keras')
-
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.utils import to_categorical

From b62fef818e8eef9be64a4f515c11a834a2b45a8f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 9 May 2018 21:34:34 +0200
Subject: [PATCH 19/50] iter

---
 imblearn/keras/generator.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index 0ac16941b..31f124163 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -15,7 +15,6 @@
 keras = pytest.importorskip("keras")
 
 
-@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 
@@ -41,7 +40,14 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    {random_state}
+    random_state : int, RandomState instance or None, optional (default=None)
+        Control the randomization of the algorithm
+        - If int, ``random_state`` is the seed used by the random number
+          generator;
+        - If ``RandomState`` instance, random_state is the random number
+          generator;
+        - If ``None``, the random number generator is the ``RandomState``
+          instance used by ``np.random``.
 
     Attributes
     ----------
@@ -127,6 +133,7 @@ def __getitem__(self, index):
             )
 
 
+@Substitution(random_state=_random_state_docstring)
 def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
                              batch_size=32, random_state=None):
     """Create a balanced batch generator to train keras model.

From 79a4a10c72801c483f109280f82bcf211cf0658f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 9 May 2018 22:29:08 +0200
Subject: [PATCH 20/50] FIX modify docstring to accept substituion

---
 imblearn/keras/generator.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index 31f124163..9f5e841c3 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -15,6 +15,7 @@
 keras = pytest.importorskip("keras")
 
 
+@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 
@@ -40,14 +41,7 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    random_state : int, RandomState instance or None, optional (default=None)
-        Control the randomization of the algorithm
-        - If int, ``random_state`` is the seed used by the random number
-          generator;
-        - If ``RandomState`` instance, random_state is the random number
-          generator;
-        - If ``None``, the random number generator is the ``RandomState``
-          instance used by ``np.random``.
+    {random_state}
 
     Attributes
     ----------
@@ -62,7 +56,7 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     >>> from sklearn.datasets import load_iris
     >>> iris = load_iris()
     >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
     >>> y = keras.utils.to_categorical(y, 3)
     >>> import keras
     >>> model = keras.models.Sequential()
@@ -177,7 +171,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     >>> from sklearn.datasets import load_iris
     >>> iris = load_iris()
     >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
     >>> y = keras.utils.to_categorical(y, 3)
     >>> import keras
     >>> model = keras.models.Sequential()

From 162cb95e1fa77e6cc723e8fb945fd2d4f633638c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 9 May 2018 22:45:04 +0200
Subject: [PATCH 21/50] FIX do not substitue inside the class

---
 imblearn/keras/generator.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
index 9f5e841c3..df5e2f4a8 100644
--- a/imblearn/keras/generator.py
+++ b/imblearn/keras/generator.py
@@ -15,7 +15,6 @@
 keras = pytest.importorskip("keras")
 
 
-@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 
@@ -41,7 +40,14 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    {random_state}
+    random_state : int, RandomState instance or None, optional (default=None)
+        Control the randomization of the algorithm
+        - If int, ``random_state`` is the seed used by the random number
+          generator;
+        - If ``RandomState`` instance, random_state is the random number
+          generator;
+        - If ``None``, the random number generator is the ``RandomState``
+          instance used by ``np.random``.
 
     Attributes
     ----------

From 06955e251c5ffebe7986cda275181696d7bf991a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 9 May 2018 23:26:46 +0200
Subject: [PATCH 22/50] EHN add tensorflow sequence

---
 imblearn/__init__.py                        |   3 +
 imblearn/keras/__init__.py                  |   4 +-
 imblearn/keras/_generator.py                | 111 ++++++++++
 imblearn/keras/generator.py                 | 230 --------------------
 imblearn/tensorflow/__init__.py             |   6 +
 imblearn/tensorflow/_generator.py           |  87 ++++++++
 imblearn/tensorflow/tests/test_generator.py |  14 ++
 7 files changed, 223 insertions(+), 232 deletions(-)
 create mode 100644 imblearn/keras/_generator.py
 delete mode 100644 imblearn/keras/generator.py
 create mode 100644 imblearn/tensorflow/__init__.py
 create mode 100644 imblearn/tensorflow/_generator.py
 create mode 100644 imblearn/tensorflow/tests/test_generator.py

diff --git a/imblearn/__init__.py b/imblearn/__init__.py
index 7803ca016..0cb3ca8fe 100644
--- a/imblearn/__init__.py
+++ b/imblearn/__init__.py
@@ -21,6 +21,9 @@
     with imbalanced dataset.
 over_sampling
     Module which provides methods to under-sample a dataset.
+tensorflow
+    Module which provides custom generator, layers for deep learning using
+    tensorflow.
 under-sampling
     Module which provides methods to over-sample a dataset.
 utils
diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index 8acdd2b03..99b91f77f 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -1,8 +1,8 @@
 """The :mod:`imblearn.keras` provides utilities to deal with imbalanced dataset
 in keras."""
 
-from .generator import BalancedBatchGenerator
-from .generator import balanced_batch_generator
+from ._generator import BalancedBatchGenerator
+from ..tensorflow._generator import balanced_batch_generator
 
 __all__ = ['BalancedBatchGenerator',
            'balanced_batch_generator']
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
new file mode 100644
index 000000000..d655b926a
--- /dev/null
+++ b/imblearn/keras/_generator.py
@@ -0,0 +1,111 @@
+"""Implement generators for ``keras`` which will balance the data."""
+from __future__ import division
+
+import pytest
+
+from sklearn.base import clone
+from sklearn.utils import safe_indexing
+from sklearn.utils import check_random_state
+from sklearn.utils.testing import set_random_state
+
+from ..under_sampling import RandomUnderSampler
+
+keras = pytest.importorskip("keras")
+
+
+class BalancedBatchGenerator(keras.utils.Sequence):
+    """Create balanced batches when training a keras model.
+
+    Create a keras ``Sequence`` which is given to ``fit_generator``. The
+    sampler defines the sampling strategy used to balance the dataset ahead of
+    creating the batch. The sampler should have an attribute
+    ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        Control the randomization of the algorithm
+        - If int, ``random_state`` is the seed used by the random number
+          generator;
+        - If ``RandomState`` instance, random_state is the random number
+          generator;
+        - If ``None``, the random number generator is the ``RandomState``
+          instance used by ``np.random``.
+
+    Attributes
+    ----------
+    sampler_ : object
+        The sampler used to balance the dataset.
+
+    indices_ : ndarray, shape (n_samples, n_features)
+        The indices of the samples selected during sampling.
+
+    """
+    def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
+                 random_state=None):
+        self.X = X
+        self.y = y
+        self.sample_weight = sample_weight
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.random_state = random_state
+        self._sample()
+
+    def _sample(self):
+        random_state = check_random_state(self.random_state)
+        if self.sampler is None:
+            self.sampler_ = RandomUnderSampler(return_indices=True,
+                                               random_state=random_state)
+        else:
+            if not hasattr(self.sampler, 'return_indices'):
+                raise ValueError("'sampler' needs to return the indices of "
+                                 "the samples selected. Provide a sampler "
+                                 "which has an attribute 'return_indices'.")
+            self.sampler_ = clone(self.sampler)
+            self.sampler_.set_params(return_indices=True)
+            set_random_state(self.sampler_, random_state)
+
+        _, _, self.indices_ = self.sampler_.fit_sample(self.X, self.y)
+        # shuffle the indices since the sampler are packing them by class
+        random_state.shuffle(self.indices_)
+
+    def __len__(self):
+        return int(self.indices_.size // self.batch_size)
+
+    def __getitem__(self, index):
+        if self.sample_weight is None:
+            return (
+                safe_indexing(self.X,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.y,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size])
+            )
+        else:
+            return (
+                safe_indexing(self.X,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.y,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size]),
+                safe_indexing(self.sample_weight,
+                              self.indices_[index * self.batch_size:
+                                            (index + 1) * self.batch_size])
+            )
diff --git a/imblearn/keras/generator.py b/imblearn/keras/generator.py
deleted file mode 100644
index df5e2f4a8..000000000
--- a/imblearn/keras/generator.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""Implement generators for ``keras`` which will balance the data."""
-from __future__ import division
-
-import pytest
-
-from sklearn.base import clone
-from sklearn.utils import safe_indexing
-from sklearn.utils import check_random_state
-from sklearn.utils.testing import set_random_state
-
-from ..under_sampling import RandomUnderSampler
-from ..utils import Substitution
-from ..utils._docstring import _random_state_docstring
-
-keras = pytest.importorskip("keras")
-
-
-class BalancedBatchGenerator(keras.utils.Sequence):
-    """Create balanced batches when training a keras model.
-
-    Create a keras ``Sequence`` which is given to ``fit_generator``. The
-    sampler defines the sampling strategy used to balance the dataset ahead of
-    creating the batch. The sampler should have an attribute
-    ``return_indices``.
-
-    Parameters
-    ----------
-    X : ndarray, shape (n_samples, n_features)
-        Original imbalanced dataset.
-
-    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
-        Associated targets.
-
-    sample_weight : ndarray, shape (n_samples,)
-        Sample weight.
-
-    sampler : object or None, optional (default=None)
-        A sampler instance which has an attribute ``return_indices``.
-
-    batch_size : int, optional (default=32)
-        Number of samples per gradient update.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        Control the randomization of the algorithm
-        - If int, ``random_state`` is the seed used by the random number
-          generator;
-        - If ``RandomState`` instance, random_state is the random number
-          generator;
-        - If ``None``, the random number generator is the ``RandomState``
-          instance used by ``np.random``.
-
-    Attributes
-    ----------
-    sampler_ : object
-        The sampler used to balance the dataset.
-
-    indices_ : ndarray, shape (n_samples, n_features)
-        The indices of the samples selected during sampling.
-
-    Examples
-    --------
-    >>> from sklearn.datasets import load_iris
-    >>> iris = load_iris()
-    >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
-    >>> y = keras.utils.to_categorical(y, 3)
-    >>> import keras
-    >>> model = keras.models.Sequential()
-    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
-    ...                              activation='softmax'))
-    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
-    ...               metrics=['accuracy'])
-    >>> from imblearn.keras import BalancedBatchGenerator
-    >>> from imblearn.under_sampling import NearMiss
-    >>> training_generator = BalancedBatchGenerator(
-    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
-    >>> callback_history = model.fit_generator(generator=training_generator,
-    ...                                        epochs=10, verbose=0)
-
-    """
-    def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
-                 random_state=None):
-        self.X = X
-        self.y = y
-        self.sample_weight = sample_weight
-        self.sampler = sampler
-        self.batch_size = batch_size
-        self.random_state = random_state
-        self._sample()
-
-    def _sample(self):
-        random_state = check_random_state(self.random_state)
-        if self.sampler is None:
-            self.sampler_ = RandomUnderSampler(return_indices=True,
-                                               random_state=random_state)
-        else:
-            if not hasattr(self.sampler, 'return_indices'):
-                raise ValueError("'sampler' needs to return the indices of "
-                                 "the samples selected. Provide a sampler "
-                                 "which has an attribute 'return_indices'.")
-            self.sampler_ = clone(self.sampler)
-            self.sampler_.set_params(return_indices=True)
-            set_random_state(self.sampler_, random_state)
-
-        _, _, self.indices_ = self.sampler_.fit_sample(self.X, self.y)
-        # shuffle the indices since the sampler are packing them by class
-        random_state.shuffle(self.indices_)
-
-    def __len__(self):
-        return int(self.indices_.size // self.batch_size)
-
-    def __getitem__(self, index):
-        if self.sample_weight is None:
-            return (
-                safe_indexing(self.X,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.y,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size])
-            )
-        else:
-            return (
-                safe_indexing(self.X,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.y,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.sample_weight,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size])
-            )
-
-
-@Substitution(random_state=_random_state_docstring)
-def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
-                             batch_size=32, random_state=None):
-    """Create a balanced batch generator to train keras model.
-
-    Returns a generator --- as well as the number of step per epoch --- which
-    is given to ``fit_generator``. The sampler defines the sampling strategy
-    used to balance the dataset ahead of creating the batch. The sampler should
-    have an attribute ``return_indices``.
-
-    Parameters
-    ----------
-    X : ndarray, shape (n_samples, n_features)
-        Original imbalanced dataset.
-
-    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
-        Associated targets.
-
-    sample_weight : ndarray, shape (n_samples,)
-        Sample weight.
-
-    sampler : object or None, optional (default=None)
-        A sampler instance which has an attribute ``return_indices``.
-
-    batch_size : int, optional (default=32)
-        Number of samples per gradient update.
-
-    {random_state}
-
-    Returns
-    -------
-    generator : generator of tuple
-        Generate batch of data. The tuple generated are either (X_batch,
-        y_batch) or (X_batch, y_batch, sampler_weight_batch).
-
-    steps_per_epoch : int
-        The number of samples per epoch. Required by ``fit_generator`` in
-        keras.
-
-    Examples
-    --------
-    >>> from sklearn.datasets import load_iris
-    >>> iris = load_iris()
-    >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
-    >>> y = keras.utils.to_categorical(y, 3)
-    >>> import keras
-    >>> model = keras.models.Sequential()
-    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
-    ...                              activation='softmax'))
-    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
-    ...               metrics=['accuracy'])
-    >>> from imblearn.keras import balanced_batch_generator
-    >>> from imblearn.under_sampling import NearMiss
-    >>> training_generator, steps_per_epoch = balanced_batch_generator(
-    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
-    >>> callback_history = model.fit_generator(generator=training_generator,
-    ...                                        steps_per_epoch=steps_per_epoch,
-    ...                                        epochs=10, verbose=0)
-
-
-    """
-    random_state = check_random_state(random_state)
-    if sampler is None:
-        sampler_ = RandomUnderSampler(return_indices=True,
-                                      random_state=random_state)
-    else:
-        if not hasattr(sampler, 'return_indices'):
-            raise ValueError("'sampler' needs to return the indices of "
-                             "the samples selected. Provide a sampler "
-                             "which has an attribute 'return_indices'.")
-        sampler_ = clone(sampler)
-        sampler_.set_params(return_indices=True)
-        set_random_state(sampler_, random_state)
-
-    _, _, indices = sampler_.fit_sample(X, y)
-    # shuffle the indices since the sampler are packing them by class
-    random_state.shuffle(indices)
-
-    def generator(X, y, sample_weight, indices, batch_size):
-        if sample_weight is None:
-            while True:
-                for index in range(0, len(indices), batch_size):
-                    yield (safe_indexing(X, indices[index:index + batch_size]),
-                           safe_indexing(y, indices[index:index + batch_size]))
-        else:
-            while True:
-                for index in range(0, len(indices), batch_size):
-                    yield (safe_indexing(X, indices[index:index + batch_size]),
-                           safe_indexing(y, indices[index:index + batch_size]),
-                           safe_indexing(sample_weight,
-                                         indices[index:index + batch_size]))
-
-    return (generator(X, y, sample_weight, indices, batch_size),
-            int(indices.size // batch_size))
diff --git a/imblearn/tensorflow/__init__.py b/imblearn/tensorflow/__init__.py
new file mode 100644
index 000000000..3224a7db1
--- /dev/null
+++ b/imblearn/tensorflow/__init__.py
@@ -0,0 +1,6 @@
+"""The :mod:`imblearn.tensorflow` provides utilities to deal with imbalanced
+dataset in tensorflow."""
+
+from ._generator import balanced_batch_generator
+
+__all__ = ['balanced_batch_generator']
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
new file mode 100644
index 000000000..f1704bbbb
--- /dev/null
+++ b/imblearn/tensorflow/_generator.py
@@ -0,0 +1,87 @@
+"""Implement generators for ``tensorflow`` which will balance the data."""
+
+from __future__ import division
+
+from sklearn.base import clone
+from sklearn.utils import safe_indexing
+from sklearn.utils import check_random_state
+from sklearn.utils.testing import set_random_state
+
+from ..under_sampling import RandomUnderSampler
+from ..utils import Substitution
+from ..utils._docstring import _random_state_docstring
+
+
+@Substitution(random_state=_random_state_docstring)
+def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
+                             batch_size=32, random_state=None):
+    """Create a balanced batch generator to train keras model.
+
+    Returns a generator --- as well as the number of step per epoch --- which
+    is given to ``fit_generator``. The sampler defines the sampling strategy
+    used to balance the dataset ahead of creating the batch. The sampler should
+    have an attribute ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    {random_state}
+
+    Returns
+    -------
+    generator : generator of tuple
+        Generate batch of data. The tuple generated are either (X_batch,
+        y_batch) or (X_batch, y_batch, sampler_weight_batch).
+
+    steps_per_epoch : int
+        The number of samples per epoch. Required by ``fit_generator`` in
+        keras.
+
+    """
+    random_state = check_random_state(random_state)
+    if sampler is None:
+        sampler_ = RandomUnderSampler(return_indices=True,
+                                      random_state=random_state)
+    else:
+        if not hasattr(sampler, 'return_indices'):
+            raise ValueError("'sampler' needs to return the indices of "
+                             "the samples selected. Provide a sampler "
+                             "which has an attribute 'return_indices'.")
+        sampler_ = clone(sampler)
+        sampler_.set_params(return_indices=True)
+        set_random_state(sampler_, random_state)
+
+    _, _, indices = sampler_.fit_sample(X, y)
+    # shuffle the indices since the sampler are packing them by class
+    random_state.shuffle(indices)
+
+    def generator(X, y, sample_weight, indices, batch_size):
+        if sample_weight is None:
+            while True:
+                for index in range(0, len(indices), batch_size):
+                    yield (safe_indexing(X, indices[index:index + batch_size]),
+                           safe_indexing(y, indices[index:index + batch_size]))
+        else:
+            while True:
+                for index in range(0, len(indices), batch_size):
+                    yield (safe_indexing(X, indices[index:index + batch_size]),
+                           safe_indexing(y, indices[index:index + batch_size]),
+                           safe_indexing(sample_weight,
+                                         indices[index:index + batch_size]))
+
+    return (generator(X, y, sample_weight, indices, batch_size),
+            int(indices.size // batch_size))
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
new file mode 100644
index 000000000..166fbb870
--- /dev/null
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -0,0 +1,14 @@
+import pytest
+
+tf = pytest.importorskip('tensforflow')
+
+from sklearn.datasets import load_iris
+
+from imblearn.datasets import make_imbalance
+from imblearn.under_sampling import ClusterCentroids
+from imblearn.under_sampling import NearMiss
+
+from imblearn.tensforflow import balanced_batch_generator
+
+iris = load_iris()
+X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})

From 0ce4a5c5621027181d77157242bb41a3f1415ec3 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 11 May 2018 13:39:59 +0200
Subject: [PATCH 23/50] TST generator for tensorflow

---
 imblearn/keras/tests/test_generator.py      | 10 +--
 imblearn/tensorflow/tests/test_generator.py | 72 +++++++++++++++++++--
 2 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index d1107d2e5..d94dd73da 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -2,11 +2,6 @@
 
 import numpy as np
 
-keras = pytest.importorskip('keras')
-from keras.models import Sequential
-from keras.layers import Dense
-from keras.utils import to_categorical
-
 from sklearn.datasets import load_iris
 
 from imblearn.datasets import make_imbalance
@@ -16,6 +11,11 @@
 from imblearn.keras import BalancedBatchGenerator
 from imblearn.keras import balanced_batch_generator
 
+keras = pytest.importorskip('keras')
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.utils import to_categorical
+
 iris = load_iris()
 X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
 y = to_categorical(y, 3)
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
index 166fbb870..2045690e4 100644
--- a/imblearn/tensorflow/tests/test_generator.py
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -1,14 +1,74 @@
-import pytest
+from __future__ import division
 
-tf = pytest.importorskip('tensforflow')
+import pytest
+import numpy as np
 
 from sklearn.datasets import load_iris
 
 from imblearn.datasets import make_imbalance
-from imblearn.under_sampling import ClusterCentroids
 from imblearn.under_sampling import NearMiss
 
-from imblearn.tensforflow import balanced_batch_generator
+from imblearn.tensorflow import balanced_batch_generator
+
+tf = pytest.importorskip('tensorflow')
+
+
+@pytest.mark.parametrize(
+    "sampler",
+    [None, NearMiss()]
+)
+def test_balanced_batch_generator(sampler):
+    X, y = load_iris(return_X_y=True)
+    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
+    X = X.astype(np.float32)
+
+    batch_size = 10
+    training_generator, steps_per_epoch = balanced_batch_generator(
+        X, y, sample_weight=None, sampler=sampler,
+        batch_size=batch_size, random_state=42)
+
+    learning_rate = 0.01
+    epochs = 10
+    input_size = X.shape[1]
+    output_size = 3
+
+    # helper functions
+    def init_weights(shape):
+        return tf.Variable(tf.random_normal(shape, stddev=0.01))
+
+    def accuracy(y_true, y_pred):
+        return np.mean(np.argmax(y_pred, axis=1) == y_true)
+
+    # input and output
+    data = tf.placeholder("float32", shape=[None, input_size])
+    targets = tf.placeholder("int32", shape=[None])
+
+    # build the model and weights
+    W = init_weights([input_size, output_size])
+    b = init_weights([output_size])
+    out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)
+
+    # build the loss, predict, and train operator
+    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        logits=out_act, labels=targets)
+    loss = tf.reduce_sum(cross_entropy)
+    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+    train_op = optimizer.minimize(loss)
+    predict = tf.nn.softmax(out_act)
+
+    # Initialization of all variables in the graph
+    init = tf.global_variables_initializer()
+
+    with tf.Session() as sess:
+        sess.run(init)
+
+        for e in range(epochs):
+            for i in range(steps_per_epoch):
+                X_batch, y_batch = next(training_generator)
+                sess.run([train_op, loss],
+                         feed_dict={data: X_batch, targets: y_batch})
 
-iris = load_iris()
-X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
+            # For each epoch, run accuracy on train and test
+            predicts_train = sess.run(predict, feed_dict={data: X})
+            print("epoch: {} train accuracy: {:.3f}"
+                  .format(e, accuracy(y, predicts_train)))

From d2bc9ca9afc6a1c73d51df6711912bfcbf47005d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 01:03:11 +0200
Subject: [PATCH 24/50] DOCAdd simple examples

---
 imblearn/keras/__init__.py        | 67 +++++++++++++++++++++++++++++++
 imblearn/keras/_generator.py      | 25 ++++++++++++
 imblearn/tensorflow/_generator.py | 57 ++++++++++++++++++++++++++
 3 files changed, 149 insertions(+)

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index 99b91f77f..449a2193b 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -4,5 +4,72 @@
 from ._generator import BalancedBatchGenerator
 from ..tensorflow._generator import balanced_batch_generator
 
+balanced_batch_generator.__doc__ = \
+    """Create a balanced batch generator to train keras model.
+
+    Returns a generator --- as well as the number of step per epoch --- which
+    is given to ``fit_generator``. The sampler defines the sampling strategy
+    used to balance the dataset ahead of creating the batch. The sampler should
+    have an attribute ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        Control the randomization of the algorithm
+        - If int, ``random_state`` is the seed used by the random number
+          generator;
+        - If ``RandomState`` instance, random_state is the random number
+          generator;
+        - If ``None``, the random number generator is the ``RandomState``
+          instance used by ``np.random``.
+
+    Returns
+    -------
+    generator : generator of tuple
+        Generate batch of data. The tuple generated are either (X_batch,
+        y_batch) or (X_batch, y_batch, sampler_weight_batch).
+
+    steps_per_epoch : int
+        The number of samples per epoch. Required by ``fit_generator`` in
+        keras.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> iris = load_iris()
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
+    >>> y = keras.utils.to_categorical(y, 3)
+    >>> import keras
+    >>> model = keras.models.Sequential()
+    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+    ...                              activation='softmax'))
+    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+    ...               metrics=['accuracy'])
+    >>> from imblearn.keras import balanced_batch_generator
+    >>> from imblearn.under_sampling import NearMiss
+    >>> training_generator, steps_per_epoch = balanced_batch_generator(
+    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
+    >>> callback_history = model.fit_generator(generator=training_generator,
+    ...                                        steps_per_epoch=steps_per_epoch,
+    ...                                        epochs=10, verbose=0)
+
+    """
+
 __all__ = ['BalancedBatchGenerator',
            'balanced_batch_generator']
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index d655b926a..51609cd1f 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -9,10 +9,13 @@
 from sklearn.utils.testing import set_random_state
 
 from ..under_sampling import RandomUnderSampler
+from ..utils import Substitution
+from ..utils._docstring import _random_state_docstring
 
 keras = pytest.importorskip("keras")
 
 
+@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 
@@ -55,6 +58,28 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     indices_ : ndarray, shape (n_samples, n_features)
         The indices of the samples selected during sampling.
 
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> iris = load_iris()
+    >>> from imblearn.datasets import make_imbalance
+    >>> class_dict = dict()
+    >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
+    >>> X, y = make_imbalance(iris.data, iris.target, class_dict)
+    >>> y = keras.utils.to_categorical(y, 3)
+    >>> import keras
+    >>> model = keras.models.Sequential()
+    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+    ...                              activation='softmax'))
+    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+    ...               metrics=['accuracy'])
+    >>> from imblearn.keras import BalancedBatchGenerator
+    >>> from imblearn.under_sampling import NearMiss
+    >>> training_generator = BalancedBatchGenerator(
+    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
+    >>> callback_history = model.fit_generator(generator=training_generator,
+    ...                                        epochs=10, verbose=0)
+
     """
     def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
                  random_state=None):
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index f1704bbbb..e5ee1dca1 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -51,7 +51,64 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
         The number of samples per epoch. Required by ``fit_generator`` in
         keras.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import load_iris
+    >>> X, y = load_iris(return_X_y=True)
+    >>> class_dict = dict()
+    >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(X, y, class_dict)
+    >>> X = X.astype(np.float32)
+    >>> batch_size, learning_rate, epochs = 10, 0.01, 10
+    >>> training_generator, steps_per_epoch = balanced_batch_generator(
+    ...     X, y, sample_weight=None, sampler=None,
+    ...     batch_size=batch_size, random_state=42)
+    >>> input_size, output_size = X.shape[1], 3
+    >>> import tensorflow as tf
+    >>> def init_weights(shape):
+    ...     return tf.Variable(tf.random_normal(shape, stddev=0.01))
+    >>> def accuracy(y_true, y_pred):
+    ...     return np.mean(np.argmax(y_pred, axis=1) == y_true)
+    >>> # input and output
+    >>> data = tf.placeholder("float32", shape=[None, input_size])
+    >>> targets = tf.placeholder("int32", shape=[None])
+    >>> # build the model and weights
+    >>> W = init_weights([input_size, output_size])
+    >>> b = init_weights([output_size])
+    >>> out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)
+    >>> # build the loss, predict, and train operator
+    >>> cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+    ...     logits=out_act, labels=targets)
+    >>> loss = tf.reduce_sum(cross_entropy)
+    >>> optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+    >>> train_op = optimizer.minimize(loss)
+    >>> predict = tf.nn.softmax(out_act)
+    >>> # Initialization of all variables in the graph
+    >>> init = tf.global_variables_initializer()
+    >>> with tf.Session() as sess:
+    ...     print('Starting training')
+    ...     sess.run(init)
+    ...     for e in range(epochs):
+    ...         for i in range(steps_per_epoch):
+    ...             X_batch, y_batch = next(training_generator)
+    ...             feed_dict = dict()
+    ...             feed_dict[data] = X_batch; feed_dict[targets] = y_batch
+    ...             sess.run([train_op, loss], feed_dict=feed_dict)
+    ...         # For each epoch, run accuracy on train and test
+    ...         feed_dict = dict()
+    ...         feed_dict[data] = X
+    ...         predicts_train = sess.run(predict, feed_dict=feed_dict)
+    ...         print("epoch: {{}} train accuracy: {{:.3f}}"
+    ...               .format(e, accuracy(y, predicts_train)))
+    ... # doctest: +ELLIPSIS
+    Starting training
+    [...
+
     """
+
+
     random_state = check_random_state(random_state)
     if sampler is None:
         sampler_ = RandomUnderSampler(return_indices=True,

From 731622e23eaf3d8c03abc49da8577144cecad0bc Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 01:07:13 +0200
Subject: [PATCH 25/50] DOC add to api documentation

---
 doc/api.rst | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/doc/api.rst b/doc/api.rst
index f9566146f..3af9fe59e 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -111,6 +111,41 @@ Prototype selection
    ensemble.BalancedBaggingClassifier
    ensemble.EasyEnsemble
 
+.. _keras_ref:
+
+:mod:`imblearn.keras`: Batch generator for Keras
+================================================
+
+.. automodule:: imblearn.keras
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: imblearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   keras.BalancedBatchGenerator
+   keras.balanced_batch_generator
+
+.. _tensorflow_ref:
+   
+:mod:`imblearn.tensorflow`: Batch generator for TensorFlow
+==========================================================
+
+.. automodule:: imblearn.tensorflow
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: imblearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   tensorflow.balanced_batch_generator
+
 .. _misc_ref:
    
 Miscellaneous

From 0e8eafe3e4dba1177379adc985fadcd303334e70 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 01:10:16 +0200
Subject: [PATCH 26/50] FIX add function summary

---
 doc/api.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 3af9fe59e..4abc49d33 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -127,6 +127,11 @@ Prototype selection
    :template: class.rst
 
    keras.BalancedBatchGenerator
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+   
    keras.balanced_batch_generator
 
 .. _tensorflow_ref:
@@ -142,8 +147,8 @@ Prototype selection
 
 .. autosummary::
    :toctree: generated/
-   :template: class.rst
-
+   :template: function.rst
+              
    tensorflow.balanced_batch_generator
 
 .. _misc_ref:

From b64e04f65d0fa19e8701cb170b6576c99e019617 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 16:56:10 +0200
Subject: [PATCH 27/50] iter

---
 imblearn/keras/__init__.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index 449a2193b..c4e81f4e3 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -51,11 +51,14 @@
     Examples
     --------
     >>> from sklearn.datasets import load_iris
-    >>> iris = load_iris()
+    >>> X, y = load_iris(return_X_y=True)
     >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(iris.data, iris.target, {{0: 30, 1: 50, 2: 40}})
-    >>> y = keras.utils.to_categorical(y, 3)
+    >>> class_dict = dict()
+    >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(X, y, class_dict)
     >>> import keras
+    >>> y = keras.utils.to_categorical(y, 3)
     >>> model = keras.models.Sequential()
     >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
     ...                              activation='softmax'))

From c59fb8b702966e7973ef8297e38efa3056a35b3e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 21:39:44 +0200
Subject: [PATCH 28/50] Update docstring

---
 imblearn/keras/__init__.py        | 72 +----------------------------
 imblearn/keras/_generator.py      | 75 ++++++++++++++++++++++++++++++-
 imblearn/tensorflow/_generator.py |  1 -
 imblearn/utils/_docstring.py      |  3 +-
 4 files changed, 76 insertions(+), 75 deletions(-)

diff --git a/imblearn/keras/__init__.py b/imblearn/keras/__init__.py
index c4e81f4e3..407e0c7dd 100644
--- a/imblearn/keras/__init__.py
+++ b/imblearn/keras/__init__.py
@@ -2,77 +2,7 @@
 in keras."""
 
 from ._generator import BalancedBatchGenerator
-from ..tensorflow._generator import balanced_batch_generator
-
-balanced_batch_generator.__doc__ = \
-    """Create a balanced batch generator to train keras model.
-
-    Returns a generator --- as well as the number of step per epoch --- which
-    is given to ``fit_generator``. The sampler defines the sampling strategy
-    used to balance the dataset ahead of creating the batch. The sampler should
-    have an attribute ``return_indices``.
-
-    Parameters
-    ----------
-    X : ndarray, shape (n_samples, n_features)
-        Original imbalanced dataset.
-
-    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
-        Associated targets.
-
-    sample_weight : ndarray, shape (n_samples,)
-        Sample weight.
-
-    sampler : object or None, optional (default=None)
-        A sampler instance which has an attribute ``return_indices``.
-
-    batch_size : int, optional (default=32)
-        Number of samples per gradient update.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        Control the randomization of the algorithm
-        - If int, ``random_state`` is the seed used by the random number
-          generator;
-        - If ``RandomState`` instance, random_state is the random number
-          generator;
-        - If ``None``, the random number generator is the ``RandomState``
-          instance used by ``np.random``.
-
-    Returns
-    -------
-    generator : generator of tuple
-        Generate batch of data. The tuple generated are either (X_batch,
-        y_batch) or (X_batch, y_batch, sampler_weight_batch).
-
-    steps_per_epoch : int
-        The number of samples per epoch. Required by ``fit_generator`` in
-        keras.
-
-    Examples
-    --------
-    >>> from sklearn.datasets import load_iris
-    >>> X, y = load_iris(return_X_y=True)
-    >>> from imblearn.datasets import make_imbalance
-    >>> class_dict = dict()
-    >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
-    >>> from imblearn.datasets import make_imbalance
-    >>> X, y = make_imbalance(X, y, class_dict)
-    >>> import keras
-    >>> y = keras.utils.to_categorical(y, 3)
-    >>> model = keras.models.Sequential()
-    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
-    ...                              activation='softmax'))
-    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
-    ...               metrics=['accuracy'])
-    >>> from imblearn.keras import balanced_batch_generator
-    >>> from imblearn.under_sampling import NearMiss
-    >>> training_generator, steps_per_epoch = balanced_batch_generator(
-    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
-    >>> callback_history = model.fit_generator(generator=training_generator,
-    ...                                        steps_per_epoch=steps_per_epoch,
-    ...                                        epochs=10, verbose=0)
-
-    """
+from ._generator import balanced_batch_generator
 
 __all__ = ['BalancedBatchGenerator',
            'balanced_batch_generator']
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index 51609cd1f..e46df80bd 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -8,11 +8,12 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import set_random_state
 
+keras = pytest.importorskip("keras")
+
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
-
-keras = pytest.importorskip("keras")
+from ..tensorflow import balanced_batch_generator as keras_bbg
 
 
 @Substitution(random_state=_random_state_docstring)
@@ -134,3 +135,73 @@ def __getitem__(self, index):
                               self.indices_[index * self.batch_size:
                                             (index + 1) * self.batch_size])
             )
+
+
+@Substitution(random_state=_random_state_docstring)
+def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
+                             batch_size=32, random_state=None):
+    """Create a balanced batch generator to train keras model.
+
+    Returns a generator --- as well as the number of step per epoch --- which
+    is given to ``fit_generator``. The sampler defines the sampling strategy
+    used to balance the dataset ahead of creating the batch. The sampler should
+    have an attribute ``return_indices``.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Original imbalanced dataset.
+
+    y : ndarray, shape (n_samples,) or (n_samples, n_classes)
+        Associated targets.
+
+    sample_weight : ndarray, shape (n_samples,)
+        Sample weight.
+
+    sampler : object or None, optional (default=None)
+        A sampler instance which has an attribute ``return_indices``.
+
+    batch_size : int, optional (default=32)
+        Number of samples per gradient update.
+
+    {random_state}
+
+    Returns
+    -------
+    generator : generator of tuple
+        Generate batch of data. The tuple generated are either (X_batch,
+        y_batch) or (X_batch, y_batch, sampler_weight_batch).
+
+    steps_per_epoch : int
+        The number of samples per epoch. Required by ``fit_generator`` in
+        keras.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> X, y = load_iris(return_X_y=True)
+    >>> from imblearn.datasets import make_imbalance
+    >>> class_dict = dict()
+    >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
+    >>> from imblearn.datasets import make_imbalance
+    >>> X, y = make_imbalance(X, y, class_dict)
+    >>> import keras
+    >>> y = keras.utils.to_categorical(y, 3)
+    >>> model = keras.models.Sequential()
+    >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+    ...                              activation='softmax'))
+    >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+    ...               metrics=['accuracy'])
+    >>> from imblearn.keras import balanced_batch_generator
+    >>> from imblearn.under_sampling import NearMiss
+    >>> training_generator, steps_per_epoch = balanced_batch_generator(
+    ...     X, y, sampler=NearMiss(), batch_size=10, random_state=42)
+    >>> callback_history = model.fit_generator(generator=training_generator,
+    ...                                        steps_per_epoch=steps_per_epoch,
+    ...                                        epochs=10, verbose=0)
+
+    """
+
+    return keras_bbg(X=X, y=y, sample_weight=sample_weight,
+                     sampler=sampler, batch_size=batch_size,
+                     random_state=random_state)
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index e5ee1dca1..868dd1a89 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -108,7 +108,6 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
 
     """
 
-
     random_state = check_random_state(random_state)
     if sampler is None:
         sampler_ = RandomUnderSampler(return_indices=True,
diff --git a/imblearn/utils/_docstring.py b/imblearn/utils/_docstring.py
index 56ae44106..f036f31da 100644
--- a/imblearn/utils/_docstring.py
+++ b/imblearn/utils/_docstring.py
@@ -25,7 +25,8 @@ def __call__(self, obj):
 
 _random_state_docstring = \
     """random_state : int, RandomState instance or None, optional (default=None)
-        Control the randomization of the algorithm
+        Control the randomization of the algorithm.
+
         - If int, ``random_state`` is the seed used by the random number
           generator;
         - If ``RandomState`` instance, random_state is the random number

From 155fe0f8b0407636da50463ab33326f9c6859ff8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 14 May 2018 23:52:36 +0200
Subject: [PATCH 29/50] DOC fix warning (#425)

---
 doc/whats_new.rst                      |  10 --
 doc/whats_new/v0.0.1.rst               |   4 +-
 doc/whats_new/v0.0.2.rst               | 152 +++++++++++++----
 doc/whats_new/v0.0.3.rst               |  69 ++++----
 doc/whats_new/v0.0.4.rst               |   2 +-
 examples/plot_sampling_target_usage.py | 220 -------------------------
 imblearn/metrics/classification.py     |   4 +-
 imblearn/utils/_docstring.py           |   1 +
 imblearn/utils/validation.py           |   2 +-
 9 files changed, 167 insertions(+), 297 deletions(-)
 delete mode 100644 examples/plot_sampling_target_usage.py

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 6a5912887..ba3d4d584 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -11,13 +11,3 @@ Release history
 .. include:: whats_new/v0.0.2.rst
 
 .. include:: whats_new/v0.0.1.rst
-
-.. _Guillaume Lemaitre: https://github.com/glemaitre
-.. _Christos Aridas: https://github.com/chkoar
-.. _Fernando Nogueira: https://github.com/fmfn
-.. _Dayvid Oliveira: https://github.com/dvro
-.. _Francois Magimel: https://github.com/Linkid
-.. _Aliaksei Halachkin: https://github.com/honeyext
-.. _Aleksandr Loskutov: https://github.com/loskutyan
-.. _Rafael Wampfler: https://github.com/Eichhof
-.. _Joan Massich: https://github.com/massich
diff --git a/doc/whats_new/v0.0.1.rst b/doc/whats_new/v0.0.1.rst
index f450c8d28..22da3a468 100644
--- a/doc/whats_new/v0.0.1.rst
+++ b/doc/whats_new/v0.0.1.rst
@@ -9,7 +9,9 @@ Changelog
 API
 ~~~
 
-- First release of the stable API. By `Fernando Nogueira`_, `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
+- First release of the stable API. By :user;`Fernando Nogueira <fmfn>`,
+  :user:`Guillaume Lemaitre <glemaitre>`, :user:`Christos Aridas <chkoar>`,
+  and :user:`Dayvid Oliveira <dvro>`.
 
 New methods
 ~~~~~~~~~~~
diff --git a/doc/whats_new/v0.0.2.rst b/doc/whats_new/v0.0.2.rst
index cc3c7d1e3..60f64e92c 100644
--- a/doc/whats_new/v0.0.2.rst
+++ b/doc/whats_new/v0.0.2.rst
@@ -9,51 +9,141 @@ Changelog
 Bug fixes
 ~~~~~~~~~
 
-- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the right samples during under sampling for the method 3. By `Guillaume Lemaitre`_.
-- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the `random_state` generation. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
-- Fixed a bug in :class:`under_sampling.RepeatedEditedNearestNeighbours`, add additional stopping criterion to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.
-- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid that the minority class become a majority class or that a class disappear. By `Guillaume Lemaitre`_.
-- Fixed a bug in :class:`under_sampling.CondensedNeareastNeigbour`, correction of the list of indices returned. By `Guillaume Lemaitre`_.
-- Fixed a bug in :class:`ensemble.BalanceCascade`, solve the issue to obtain a single array if desired. By `Guillaume Lemaitre`_.
-- Fixed a bug in :class:`pipeline.Pipeline`, solve to embed `Pipeline` in other `Pipeline`. :issue:`231` by `Christos Aridas`_ .
-- Fixed a bug in :class:`pipeline.Pipeline`, solve the issue to put to sampler in the same `Pipeline`. :issue:`188` by `Christos Aridas`_ .
-- Fixed a bug in :class:`under_sampling.CondensedNeareastNeigbour`, correction of the shape of `sel_x` when only one sample is selected. By `Aliaksei Halachkin`_.
-- Fixed a bug in :class:`under_sampling.NeighbourhoodCleaningRule`, selecting neighbours instead of minority class misclassified samples. :issue:`230` by `Aleksandr Loskutov`_.
-- Fixed a bug in :class:`over_sampling.ADASYN`, correction of the creation of a new sample so that the new sample lies between the minority sample and the nearest neighbour. :issue:`235` by `Rafael Wampfler`_.
+- Fixed a bug in :class:`under_sampling.NearMiss` which was not picking the
+  right samples during under sampling for the method 3. By :user:`Guillaume
+  Lemaitre <glemaitre>`.
+
+- Fixed a bug in :class:`ensemble.EasyEnsemble`, correction of the
+  `random_state` generation. By :user:`Guillaume Lemaitre <glemaitre>` and
+  :user:`Christos Aridas <chkoar>`.
+
+- Fixed a bug in :class:`under_sampling.RepeatedEditedNearestNeighbours`, add
+  additional stopping criterion to avoid that the minority class become a
+  majority class or that a class disappear. By :user:`Guillaume Lemaitre
+  <glemaitre>`.
+
+- Fixed a bug in :class:`under_sampling.AllKNN`, add stopping criteria to avoid
+  that the minority class become a majority class or that a class disappear. By
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Fixed a bug in :class:`under_sampling.CondensedNeareastNeigbour`, correction
+  of the list of indices returned. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Fixed a bug in :class:`ensemble.BalanceCascade`, solve the issue to obtain a
+  single array if desired. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Fixed a bug in :class:`pipeline.Pipeline`, solve to embed `Pipeline` in other
+  `Pipeline`. :issue:`231` by :user:`Christos Aridas <chkoar>`.
+
+- Fixed a bug in :class:`pipeline.Pipeline`, solve the issue to put to sampler
+  in the same `Pipeline`. :issue:`188` by :user:`Christos Aridas <chkoar>`.
+
+- Fixed a bug in :class:`under_sampling.CondensedNeareastNeigbour`, correction
+  of the shape of `sel_x` when only one sample is selected. By
+  :user:`Aliaksei Halachkin <honeyext>`.
+
+- Fixed a bug in :class:`under_sampling.NeighbourhoodCleaningRule`, selecting
+  neighbours instead of minority class misclassified samples. :issue:`230` by
+  :user:`Aleksandr Loskutov <loskutyan>`.
+
+- Fixed a bug in :class:`over_sampling.ADASYN`, correction of the creation of a
+  new sample so that the new sample lies between the minority sample and the
+  nearest neighbour. :issue:`235` by :user:`Rafael Wampfler <Eichnof>`.
 
 New features
 ~~~~~~~~~~~~
 
-- Added AllKNN under sampling technique. By `Dayvid Oliveira`_.
-- Added a module `metrics` implementing some specific scoring function for the problem of balancing. :issue:`204` by `Guillaume Lemaitre`_ and `Christos Aridas`_.
+- Added AllKNN under sampling technique. By :user:`Dayvid Oliveira <dvro>`.
+
+- Added a module `metrics` implementing some specific scoring function for the
+  problem of balancing. :issue:`204` by :user:`Guillaume Lemaitre <glemaitre>`
+  and :user:`Christos Aridas <chkoar>`.
 
 Enhancement
 ~~~~~~~~~~~
 
-- Added support for bumpversion. By `Guillaume Lemaitre`_.
-- Validate the type of target in binary samplers. A warning is raised for the moment. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
+- Added support for bumpversion. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Validate the type of target in binary samplers. A warning is raised for the
+  moment. By :user:`Guillaume Lemaitre <glemaitre>` and :user:`Christos Aridas
+  <chkoar>`.
+
 - Change from `cross_validation` module to `model_selection` module for
-  `sklearn` deprecation cycle. By `Dayvid Oliveira`_ and `Christos Aridas`_.
+  `sklearn` deprecation cycle. By :user:`Dayvid Oliveira <dvro>` and
+  :user:`Christos Aridas <chkoar>`.
 
 API changes summary
 ~~~~~~~~~~~~~~~~~~~
 
-- `size_ngh` has been deprecated in :class:`combine.SMOTEENN`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira` .
-- `size_ngh` has been deprecated in :class:`under_sampling.EditedNearestNeighbors`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- `size_ngh` has been deprecated in :class:`under_sampling.CondensedNeareastNeigbour`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- `size_ngh` has been deprecated in :class:`under_sampling.OneSidedSelection`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- `size_ngh` has been deprecated in :class:`under_sampling.NeighbourhoodCleaningRule`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- `size_ngh` has been deprecated in :class:`under_sampling.RepeatedEditedNearestNeighbours`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- `size_ngh` has been deprecated in :class:`under_sampling.AllKNN`. Use `n_neighbors` instead. By `Guillaume Lemaitre`_, `Christos Aridas`_, and `Dayvid Oliveira`_.
-- Two base classes :class:`BaseBinaryclassSampler` and :class:`BaseMulticlassSampler` have been created to handle the target type and raise warning in case of abnormality. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
-- Move `random_state` to be assigned in the :class:`SamplerMixin` initialization. By `Guillaume Lemaitre`_.
-- Provide estimators instead of parameters in :class:`combine.SMOTEENN` and :class:`combine.SMOTETomek`. Therefore, the list of parameters have been deprecated. By `Guillaume Lemaitre`_ and `Christos Aridas`_.
-- `k` has been deprecated in :class:`over_sampling.ADASYN`. Use `n_neighbors` instead. :issue:`183` by `Guillaume Lemaitre`_.
-- `k` and `m` have been deprecated in :class:`over_sampling.SMOTE`. Use `k_neighbors` and `m_neighbors` instead. :issue:`182` by `Guillaume Lemaitre`_.
-- `n_neighbors` accept `KNeighborsMixin` based object for :class:`under_sampling.EditedNearestNeighbors`, :class:`under_sampling.CondensedNeareastNeigbour`, :class:`under_sampling.NeighbourhoodCleaningRule`, :class:`under_sampling.RepeatedEditedNearestNeighbours`, and :class:`under_sampling.AllKNN`. :issue:`109` by `Guillaume Lemaitre`_.
+- `size_ngh` has been deprecated in :class:`combine.SMOTEENN`. Use
+  `n_neighbors` instead. By :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Christos Aridas <chkoar>`, and :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in
+  :class:`under_sampling.EditedNearestNeighbors`. Use `n_neighbors` instead. By
+  :user:`Guillaume Lemaitre <glemaitre>`, :user:`Christos Aridas <chkoar>`,
+  and :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in
+  :class:`under_sampling.CondensedNeareastNeigbour`. Use `n_neighbors`
+  instead. By :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Christos Aridas <chkoar>`, and
+  :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in
+  :class:`under_sampling.OneSidedSelection`. Use `n_neighbors` instead. By
+  :user:`Guillaume Lemaitre <glemaitre>`, :user:`Christos Aridas <chkoar>`,
+  and :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in
+  :class:`under_sampling.NeighbourhoodCleaningRule`. Use `n_neighbors`
+  instead. By :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Christos Aridas <chkoar>`, and
+  :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in
+  :class:`under_sampling.RepeatedEditedNearestNeighbours`. Use `n_neighbors`
+  instead. By :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Christos Aridas <chkoar>`, and
+  :user:`Dayvid Oliveira <dvro>`.
+
+- `size_ngh` has been deprecated in :class:`under_sampling.AllKNN`. Use
+  `n_neighbors` instead. By :user:`Guillaume Lemaitre <glemaitre>`,
+  :user:`Christos Aridas <chkoar>`, and :user:`Dayvid Oliveira <dvro>`.
+
+- Two base classes :class:`BaseBinaryclassSampler` and
+  :class:`BaseMulticlassSampler` have been created to handle the target type
+  and raise warning in case of abnormality.
+  By :user:`Guillaume Lemaitre <glemaitre>` and :user:`Christos Aridas <chkoar>`.
+
+- Move `random_state` to be assigned in the :class:`SamplerMixin`
+  initialization. By :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Provide estimators instead of parameters in :class:`combine.SMOTEENN` and
+  :class:`combine.SMOTETomek`. Therefore, the list of parameters have been
+  deprecated. By :user:`Guillaume Lemaitre <glemaitre>` and
+  :user:`Christos Aridas <chkoar>`.
+
+- `k` has been deprecated in :class:`over_sampling.ADASYN`. Use `n_neighbors`
+  instead. :issue:`183` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- `k` and `m` have been deprecated in :class:`over_sampling.SMOTE`. Use
+  `k_neighbors` and `m_neighbors` instead. :issue:`182` by :user:`Guillaume
+  Lemaitre <glemaitre>`.
+
+- `n_neighbors` accept `KNeighborsMixin` based object for
+  :class:`under_sampling.EditedNearestNeighbors`,
+  :class:`under_sampling.CondensedNeareastNeigbour`,
+  :class:`under_sampling.NeighbourhoodCleaningRule`,
+  :class:`under_sampling.RepeatedEditedNearestNeighbours`, and
+  :class:`under_sampling.AllKNN`. :issue:`109` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
 
 Documentation changes
 ~~~~~~~~~~~~~~~~~~~~~
 
-- Replace some remaining `UnbalancedDataset` occurences. By `Francois Magimel`_.
-- Added doctest in the documentation. By `Guillaume Lemaitre`_.
+- Replace some remaining `UnbalancedDataset` occurences.
+  By :user:`Francois Magimel <Linkid>`.
+
+- Added doctest in the documentation. By :user:`Guillaume Lemaitre
+  <glemaitre>`.
diff --git a/doc/whats_new/v0.0.3.rst b/doc/whats_new/v0.0.3.rst
index c9b69bc28..9232db6a7 100644
--- a/doc/whats_new/v0.0.3.rst
+++ b/doc/whats_new/v0.0.3.rst
@@ -8,85 +8,92 @@ Changelog
 
 Testing
 ~~~~~~~
-- Pytest is used instead of nosetests. :issue:`321` by `Joan Massich`_.
+- Pytest is used instead of nosetests. :issue:`321` by :user:`Joan Massich
+  <massich>`.
 
 Documentation
 ~~~~~~~~~~~~~
 
-- Added a User Guide and extended some examples. :issue:`295` by `Guillaume Lemaitre`_.
+- Added a User Guide and extended some examples. :issue:`295` by
+  :user:`Guillaume Lemaitre <glemaitre>`.
 
 Bug fixes
 ~~~~~~~~~
 
 - Fixed a bug in :func:`utils.check_ratio` such that an error is raised when
-  the number of samples required is negative. :issue:`312` by `Guillaume Lemaitre`_.
+  the number of samples required is negative. :issue:`312` by :user:`Guillaume
+  Lemaitre <glemaitre>`.
 
-- Fixed a bug in :class:`under_sampling.NearMiss` version 3. The
-  indices returned were wrong. :issue:`312` by `Guillaume Lemaitre`_.
+- Fixed a bug in :class:`under_sampling.NearMiss` version 3. The indices
+  returned were wrong. :issue:`312` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - Fixed bug for :class:`ensemble.BalanceCascade` and :class:`combine.SMOTEENN`
-  and :class:`SMOTETomek`. :issue:`295` by `Guillaume Lemaitre`_.`
+  and :class:`SMOTETomek`. :issue:`295` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
 
 - Fixed bug for `check_ratio` to be able to pass arguments when `ratio` is a
-  callable. :issue:`307` by `Guillaume Lemaitre`_.`
+  callable. :issue:`307` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 New features
 ~~~~~~~~~~~~
 
 - Turn off steps in :class:`pipeline.Pipeline` using the `None`
-  object. By `Christos Aridas`_.
+  object. By :user:`Christos Aridas <chkoar>`.
 
 - Add a fetching function :func:`datasets.fetch_datasets` in order to get some
-  imbalanced datasets useful for benchmarking. :issue:`249` by `Guillaume Lemaitre`_.
+  imbalanced datasets useful for benchmarking. :issue:`249` by :user:`Guillaume
+  Lemaitre <glemaitre>`.
 
 Enhancement
 ~~~~~~~~~~~
 
-- All samplers accepts sparse matrices with defaulting on CSR type. :issue:`316` by
-  `Guillaume Lemaitre`_.
+- All samplers accepts sparse matrices with defaulting on CSR
+  type. :issue:`316` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - :func:`datasets.make_imbalance` take a ratio similarly to other samplers. It
-  supports multiclass. :issue:`312` by `Guillaume Lemaitre`_.
+  supports multiclass. :issue:`312` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - All the unit tests have been factorized and a :func:`utils.check_estimators`
-  has been derived from scikit-learn. By `Guillaume Lemaitre`_.
+  has been derived from scikit-learn. By :user:`Guillaume Lemaitre
+  <glemaitre>`.
 
 - Script for automatic build of conda packages and uploading. :issue:`242` by
-  `Guillaume Lemaitre`_
+  :user:`Guillaume Lemaitre <glemaitre>`
 
-- Remove seaborn dependence and improve the examples. :issue:`264` by `Guillaume
-  Lemaitre`_.
+- Remove seaborn dependence and improve the examples. :issue:`264` by
+  :user:`Guillaume Lemaitre <glemaitre>`.
 
-- adapt all classes to multi-class resampling. :issue:`290` by `Guillaume Lemaitre`_
+- adapt all classes to multi-class resampling. :issue:`290` by :user:`Guillaume
+  Lemaitre <glemaitre>`
 
 API changes summary
 ~~~~~~~~~~~~~~~~~~~
 
-- `__init__` has been removed from the :class:`base.SamplerMixin` to
-  create a real mixin class. :issue:`242` by `Guillaume Lemaitre`_.
+- `__init__` has been removed from the :class:`base.SamplerMixin` to create a
+  real mixin class. :issue:`242` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - creation of a module :mod:`exceptions` to handle consistant raising of
-  errors. :issue:`242` by `Guillaume Lemaitre`_.
+  errors. :issue:`242` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- creation of a module ``utils.validation`` to make checking of
-  recurrent patterns. :issue:`242` by `Guillaume Lemaitre`_.
+- creation of a module ``utils.validation`` to make checking of recurrent
+  patterns. :issue:`242` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - move the under-sampling methods in ``prototype_selection`` and
-  ``prototype_generation`` submodule to make a clearer dinstinction. :issue:`277` by
-  `Guillaume Lemaitre`_.
+  ``prototype_generation`` submodule to make a clearer
+  dinstinction. :issue:`277` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- change ``ratio`` such that it can adapt to multiple class problems. :issue:`290` by
-  `Guillaume Lemaitre`_.
+- change ``ratio`` such that it can adapt to multiple class
+  problems. :issue:`290` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 Deprecation
 ~~~~~~~~~~~
 
-- Deprecation of the use of ``min_c_`` in :func:`datasets.make_imbalance`. :issue:`312` by
-  `Guillaume Lemaitre`_
+- Deprecation of the use of ``min_c_`` in
+  :func:`datasets.make_imbalance`. :issue:`312` by :user:`Guillaume Lemaitre
+  <glemaitre>`
 
 - Deprecation of the use of float in :func:`datasets.make_imbalance` for the
-  ratio parameter. :issue:`290` by `Guillaume Lemaitre`_.
+  ratio parameter. :issue:`290` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - deprecate the use of float as ratio in favor of dictionary, string, or
-  callable. :issue:`290` by `Guillaume Lemaitre`_.
-
+  callable. :issue:`290` by :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
index 41a34338d..6546a57a0 100644
--- a/doc/whats_new/v0.0.4.rst
+++ b/doc/whats_new/v0.0.4.rst
@@ -1,4 +1,4 @@
-.. _changes_0_3:
+.. _changes_0_4:
 
 Version 0.4 (under development)
 ===============================
diff --git a/examples/plot_sampling_target_usage.py b/examples/plot_sampling_target_usage.py
deleted file mode 100644
index f4339572d..000000000
--- a/examples/plot_sampling_target_usage.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""
-======================================================================
-Usage of the ``sampling_strategy`` parameter for the different algorithms
-=======================================================================
-
-This example shows the different usage of the parameter ``sampling_strategy`` for
-the different family of samplers (i.e. over-sampling, under-sampling. or
-cleaning methods).
-
-"""
-
-# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
-# License: MIT
-
-from collections import Counter
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-from sklearn.datasets import load_iris
-
-from imblearn.datasets import make_imbalance
-
-from imblearn.over_sampling import RandomOverSampler
-from imblearn.under_sampling import RandomUnderSampler
-from imblearn.under_sampling import TomekLinks
-
-print(__doc__)
-
-
-def plot_pie(y):
-    target_stats = Counter(y)
-    labels = list(target_stats.keys())
-    sizes = list(target_stats.values())
-    explode = tuple([0.1] * len(target_stats))
-
-    def make_autopct(values):
-        def my_autopct(pct):
-            total = sum(values)
-            val = int(round(pct * total / 100.0))
-            return '{p:.2f}%  ({v:d})'.format(p=pct, v=val)
-        return my_autopct
-
-    fig, ax = plt.subplots()
-    ax.pie(sizes, explode=explode, labels=labels, shadow=True,
-           autopct=make_autopct(sizes))
-    ax.axis('equal')
-
-
-###############################################################################
-# First, we will create an imbalanced data set from a the iris data set.
-
-iris = load_iris()
-
-print('Information of the original iris data set: \n {}'.format(
-    Counter(iris.target)))
-plot_pie(iris.target)
-
-sampling_strategy = {0: 10, 1: 20, 2: 47}
-X, y = make_imbalance(iris.data, iris.target, sampling_strategy=sampling_strategy)
-
-print('Information of the iris data set after making it'
-      ' imbalanced using a dict: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y)))
-plot_pie(y)
-
-###############################################################################
-# Using ``sampling_strategy`` in resampling algorithms
-###############################################################################
-
-###############################################################################
-# ``sampling_strategy`` as a ``float``
-# ...................................
-#
-# ``sampling_strategy`` can be given a ``float``. For **under-sampling
-# methods**, it corresponds to the ratio :math:`\\alpha_{us}` defined by
-# :math:`N_{rM} = \\alpha_{us} \\times N_{m}` where :math:`N_{rM}` and
-# :math:`N_{m}` are the number of samples in the majority class after
-# resampling and the number of samples in the minority class, respectively.
-
-# select only 2 classes since the ratio make sense in this case
-binary_mask = np.bitwise_or(y == 0, y == 2)
-binary_y = y[binary_mask]
-binary_X = X[binary_mask]
-
-sampling_strategy = 0.8
-
-rus = RandomUnderSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = rus.fit_sample(binary_X, binary_y)
-print('Information of the iris data set after making it '
-      'balanced using a float and an under-sampling method: \n '
-      'sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# For **over-sampling methods**, it correspond to the ratio
-# :math:`\\alpha_{os}` defined by :math:`N_{rm} = \\alpha_{os} \\times N_{m}`
-# where :math:`N_{rm}` and :math:`N_{M}` are the number of samples in the
-# minority class after resampling and the number of samples in the majority
-# class, respectively.
-
-ros = RandomOverSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = ros.fit_sample(binary_X, binary_y)
-print('Information of the iris data set after making it '
-      'balanced using a float and an over-sampling method: \n '
-      'sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# ``sampling_strategy`` has a ``str``
-# .................................
-#
-# ``sampling_strategy`` can be given as a string which specify the class targeted
-# by the resampling. With under- and over-sampling, the number of samples will
-# be equalized.
-#
-# Note that we are using multiple classes from now on.
-
-sampling_strategy = 'not minority'
-
-rus = RandomUnderSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = rus.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by under-sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-sampling_strategy = 'not majority'
-
-ros = RandomOverSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = ros.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by over-sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# With **cleaning method**, the number of samples in each class will not be
-# equalized even if targeted.
-
-sampling_strategy = 'not minority'
-tl = TomekLinks(sampling_strategy)
-X_res, y_res = tl.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by cleaning sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# ``sampling_strategy`` as a ``dict``
-# ..................................
-#
-# When ``sampling_strategy`` is a ``dict``, the keys correspond to the targeted
-# classes. The values correspond to the desired number of samples for each
-# targeted class. This is working for both **under- and over-sampling**
-# algorithms but not for the **cleaning algorithms**. Use a ``list`` instead.
-
-
-sampling_strategy = {0: 10, 1: 15, 2: 20}
-
-rus = RandomUnderSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = rus.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by under-sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-sampling_strategy = {0: 25, 1: 35, 2: 47}
-
-ros = RandomOverSampler(sampling_strategy=sampling_strategy)
-X_res, y_res = ros.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by over-sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# ``sampling_strategy`` as a ``list``
-# ..................................
-#
-# When ``sampling_strategy`` is a ``list``, the list contains the targeted
-# classes. It is used only for **cleaning methods** and raise an error
-# otherwise.
-
-sampling_strategy = [0, 1, 2]
-tl = TomekLinks(sampling_strategy=sampling_strategy)
-X_res, y_res = tl.fit_sample(X, y)
-print('Information of the iris data set after making it '
-      'balanced by cleaning sampling: \n sampling_strategy={} \n y: {}'
-      .format(sampling_strategy, Counter(y_res)))
-plot_pie(y_res)
-
-###############################################################################
-# ``sampling_strategy`` as a callable
-# ..................................
-#
-# When callable, function taking ``y`` and returns a ``dict``. The keys
-# correspond to the targeted classes. The values correspond to the desired
-# number of samples for each class.
-
-
-def ratio_multiplier(y):
-    multiplier = {1: 0.7, 2: 0.95}
-    target_stats = Counter(y)
-    for key, value in target_stats.items():
-        if key in multiplier:
-            target_stats[key] = int(value * multiplier[key])
-    return target_stats
-
-
-X_res, y_res = (RandomUnderSampler(sampling_strategy=ratio_multiplier)
-                .fit_sample(X, y))
-
-print('Information of the iris data set after balancing using a callable'
-      ' mode:\n ratio={} \n y: {}'.format(ratio_multiplier, Counter(y_res)))
-plot_pie(y_res)
-
-plt.show()
diff --git a/imblearn/metrics/classification.py b/imblearn/metrics/classification.py
index 68c6e762b..c79739e94 100644
--- a/imblearn/metrics/classification.py
+++ b/imblearn/metrics/classification.py
@@ -473,7 +473,7 @@ def geometric_mean_score(y_true,
                          average='multiclass',
                          sample_weight=None,
                          correction=0.0):
-    """Compute the geometric mean
+    """Compute the geometric mean.
 
     The geometric mean (G-mean) is the root of the product of class-wise
     sensitivity. This measure tries to maximize the accuracy on each of the
@@ -515,7 +515,7 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : str or None, optional (default=``'multiclass'``)
+    average : str or None, optional (default='multiclass')
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
diff --git a/imblearn/utils/_docstring.py b/imblearn/utils/_docstring.py
index 56ae44106..a47bef8af 100644
--- a/imblearn/utils/_docstring.py
+++ b/imblearn/utils/_docstring.py
@@ -26,6 +26,7 @@ def __call__(self, obj):
 _random_state_docstring = \
     """random_state : int, RandomState instance or None, optional (default=None)
         Control the randomization of the algorithm
+
         - If int, ``random_state`` is the seed used by the random number
           generator;
         - If ``RandomState`` instance, random_state is the random number
diff --git a/imblearn/utils/validation.py b/imblearn/utils/validation.py
index 7d4ad4495..311b50703 100644
--- a/imblearn/utils/validation.py
+++ b/imblearn/utils/validation.py
@@ -417,7 +417,7 @@ def check_sampling_strategy(sampling_strategy, y, sampling_type, **kwargs):
              ``list`` instead.
 
         - When ``list``, the list contains the targeted classes. It used only
-          for **cleaning methods``.
+          for **cleaning methods**.
 
           .. warning::
              ``list`` is available for **cleaning methods**. An error is raised

From 05ff979e9ef57aec07384557866fe2515552eab8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 15 May 2018 00:41:46 +0200
Subject: [PATCH 30/50] DOC added tensorflow user guide

---
 doc/miscellaneous.rst | 79 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/doc/miscellaneous.rst b/doc/miscellaneous.rst
index ef263a21b..2bbad5ca0 100644
--- a/doc/miscellaneous.rst
+++ b/doc/miscellaneous.rst
@@ -38,3 +38,82 @@ We illustrate the use of such sampler to implement an outlier rejection
 estimator which can be easily used within a
 :class:`imblearn.pipeline.Pipeline`:
 :ref:`sphx_glr_auto_examples_plot_outlier_rejections.py`
+
+.. _generators:
+
+Custom generators
+-----------------
+
+Imbalanced-learn provides specific generators for TensorFlow and Keras which
+will generate balanced mini-batches.
+
+.. _tensorflow_generator:
+
+TensorFlow generator
+~~~~~~~~~~~~~~~~~~~~
+
+The :func:`tensorflow.balanced_batch_generator` allow to generate balanced
+mini-batches using an imbalanced-learn sampler which returns indices::
+
+  >>> X = X.astype(np.float32)
+  >>> from imblearn.under_sampling import RandomUnderSampler
+  >>> from imblearn.tensorflow import balanced_batch_generator
+  >>> training_generator, steps_per_epoch = balanced_batch_generator(
+  ...     X, y, sample_weight=None, sampler=RandomUnderSampler(),
+  ...     batch_size=10, random_state=42)
+
+The ``generator`` and ``steps_per_epoch`` can be used during the training of
+the Tensorflow model. We will illustrate how to use this generator. First, we
+can define a logistic regression model which will be optimized by a gradient
+descent::
+
+  >>> learning_rate, epochs = 0.01, 10
+  >>> input_size, output_size = X.shape[1], 3
+  >>> import tensorflow as tf
+  >>> def init_weights(shape):
+  ...     return tf.Variable(tf.random_normal(shape, stddev=0.01))
+  >>> def accuracy(y_true, y_pred):
+  ...     return np.mean(np.argmax(y_pred, axis=1) == y_true)
+  >>> # input and output
+  >>> data = tf.placeholder("float32", shape=[None, input_size])
+  >>> targets = tf.placeholder("int32", shape=[None])
+  >>> # build the model and weights
+  >>> W = init_weights([input_size, output_size])
+  >>> b = init_weights([output_size])
+  >>> out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)
+  >>> # build the loss, predict, and train operator
+  >>> cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+  ...     logits=out_act, labels=targets)
+  >>> loss = tf.reduce_sum(cross_entropy)
+  >>> optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+  >>> train_op = optimizer.minimize(loss)
+  >>> predict = tf.nn.softmax(out_act)
+  >>> # Initialization of all variables in the graph
+  >>> init = tf.global_variables_initializer()
+
+Once the model initialize, we train the model by iterating on balanced
+mini-batches of data and minizing the loss previously defined::
+
+  >>> with tf.Session() as sess:
+  ...     print('Starting training')
+  ...     sess.run(init)
+  ...     for e in range(epochs):
+  ...         for i in range(steps_per_epoch):
+  ...             X_batch, y_batch = next(training_generator)
+  ...             feed_dict = dict()
+  ...             feed_dict[data] = X_batch; feed_dict[targets] = y_batch
+  ...             sess.run([train_op, loss], feed_dict=feed_dict)
+  ...         # For each epoch, run accuracy on train and test
+  ...         feed_dict = dict()
+  ...         feed_dict[data] = X
+  ...         predicts_train = sess.run(predict, feed_dict=feed_dict)
+  ...         print("epoch: {} train accuracy: {:.3f}"
+  ...               .format(e, accuracy(y, predicts_train)))
+  ... # doctest: +ELLIPSIS
+  Starting training
+  [...
+
+.. _keras_generator:
+
+Keras generator
+~~~~~~~~~~~~~~~

From f291b68437a693d3e00cb0a91b502ae995abad6e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 17 May 2018 23:45:13 +0200
Subject: [PATCH 31/50] DOC update the user guide

---
 doc/miscellaneous.rst | 56 +++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/doc/miscellaneous.rst b/doc/miscellaneous.rst
index 2bbad5ca0..5734f5c66 100644
--- a/doc/miscellaneous.rst
+++ b/doc/miscellaneous.rst
@@ -52,8 +52,8 @@ will generate balanced mini-batches.
 TensorFlow generator
 ~~~~~~~~~~~~~~~~~~~~
 
-The :func:`tensorflow.balanced_batch_generator` allow to generate balanced
-mini-batches using an imbalanced-learn sampler which returns indices::
+The :func:`imblearn.tensorflow.balanced_batch_generator` allow to generate
+balanced mini-batches using an imbalanced-learn sampler which returns indices::
 
   >>> X = X.astype(np.float32)
   >>> from imblearn.under_sampling import RandomUnderSampler
@@ -62,9 +62,9 @@ mini-batches using an imbalanced-learn sampler which returns indices::
   ...     X, y, sample_weight=None, sampler=RandomUnderSampler(),
   ...     batch_size=10, random_state=42)
 
-The ``generator`` and ``steps_per_epoch`` can be used during the training of
-the Tensorflow model. We will illustrate how to use this generator. First, we
-can define a logistic regression model which will be optimized by a gradient
+The ``generator`` and ``steps_per_epoch`` is used during the training of the
+Tensorflow model. We will illustrate how to use this generator. First, we can
+define a logistic regression model which will be optimized by a gradient
 descent::
 
   >>> learning_rate, epochs = 0.01, 10
@@ -91,8 +91,8 @@ descent::
   >>> # Initialization of all variables in the graph
   >>> init = tf.global_variables_initializer()
 
-Once the model initialize, we train the model by iterating on balanced
-mini-batches of data and minizing the loss previously defined::
+Once initialized, the model is trained by iterating on balanced mini-batches of
+data and minimizing the loss previously defined::
 
   >>> with tf.Session() as sess:
   ...     print('Starting training')
@@ -100,13 +100,10 @@ mini-batches of data and minizing the loss previously defined::
   ...     for e in range(epochs):
   ...         for i in range(steps_per_epoch):
   ...             X_batch, y_batch = next(training_generator)
-  ...             feed_dict = dict()
-  ...             feed_dict[data] = X_batch; feed_dict[targets] = y_batch
-  ...             sess.run([train_op, loss], feed_dict=feed_dict)
+  ...             sess.run([train_op, loss], feed_dict={data: X_batch, targets: y_batch})
   ...         # For each epoch, run accuracy on train and test
   ...         feed_dict = dict()
-  ...         feed_dict[data] = X
-  ...         predicts_train = sess.run(predict, feed_dict=feed_dict)
+  ...         predicts_train = sess.run(predict, feed_dict={data: X})
   ...         print("epoch: {} train accuracy: {:.3f}"
   ...               .format(e, accuracy(y, predicts_train)))
   ... # doctest: +ELLIPSIS
@@ -117,3 +114,38 @@ mini-batches of data and minizing the loss previously defined::
 
 Keras generator
 ~~~~~~~~~~~~~~~
+
+Keras provides an higher level API in which a model can be defined and train by
+calling ``fit_generator`` method to train the model. To illustrate, we will
+define a logistic regression model::
+
+  >>> import keras
+  >>> y = keras.utils.to_categorical(y, 3)
+  >>> model = keras.Sequential()
+  >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+  ...                              activation='softmax'))
+  >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+  ...               metrics=['accuracy'])
+
+:func:`imblearn.keras.balanced_batch_generator` creates a balanced mini-batches
+generator with the associated number of mini-batches which will be generated::
+
+  >>> from imblearn.keras import balanced_batch_generator
+  >>> training_generator, steps_per_epoch = balanced_batch_generator(
+  ...     X, y, sampler=RandomUnderSampler(), batch_size=10, random_state=42)
+
+Then, ``fit_generator`` can be called passing the generator and the step::
+
+  >>> callback_history = model.fit_generator(generator=training_generator,
+  ...                                        steps_per_epoch=steps_per_epoch,
+  ...                                        epochs=10, verbose=0)
+
+The second possibility is to use
+:class:`imblearn.keras.BalancedBatchGenerator`. Only an instance of this class
+will be passed to ``fit_generator``::
+
+  >>> from imblearn.keras import BalancedBatchGenerator
+  >>> training_generator = BalancedBatchGenerator(
+  ...     X, y, sampler=RandomUnderSampler(), batch_size=10, random_state=42)
+  >>> callback_history = model.fit_generator(generator=training_generator,
+  ...                                        epochs=10, verbose=0)

From 5b95e2634eb9278efe1dd849ecd38154bff88e02 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 17 May 2018 23:45:39 +0200
Subject: [PATCH 32/50] FIX rename function

---
 imblearn/keras/_generator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index e46df80bd..dec122943 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -13,7 +13,7 @@
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
-from ..tensorflow import balanced_batch_generator as keras_bbg
+from ..tensorflow import balanced_batch_generator as tf_bbg
 
 
 @Substitution(random_state=_random_state_docstring)
@@ -202,6 +202,6 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
 
     """
 
-    return keras_bbg(X=X, y=y, sample_weight=sample_weight,
-                     sampler=sampler, batch_size=batch_size,
-                     random_state=random_state)
+    return tf_bbg(X=X, y=y, sample_weight=sample_weight,
+                  sampler=sampler, batch_size=batch_size,
+                  random_state=random_state)

From 1077e9c3241e6df08aa6ad002c6a80fd5b1a5fef Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 17 May 2018 23:53:14 +0200
Subject: [PATCH 33/50] MAINT add optional dependencies

---
 requirements.optional.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 requirements.optional.txt

diff --git a/requirements.optional.txt b/requirements.optional.txt
new file mode 100644
index 000000000..826277d5e
--- /dev/null
+++ b/requirements.optional.txt
@@ -0,0 +1,2 @@
+keras
+tensorflow

From f95753197c1ff39f76e5bc4a9bc2f80ccbb94619 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 18 May 2018 00:08:07 +0200
Subject: [PATCH 34/50] DOC fix python 2 doc override

---
 doc/whats_new/v0.0.4.rst     | 6 ++++++
 imblearn/keras/_generator.py | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
index 6546a57a0..005d5ea97 100644
--- a/doc/whats_new/v0.0.4.rst
+++ b/doc/whats_new/v0.0.4.rst
@@ -18,6 +18,12 @@ API
 - Enable to use a ``list`` for the cleaning methods to specify the class to
   sample. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+New features
+............
+
+- Add a ``keras`` and ``tensorflow`` modules to create balanced mini-batches
+  generator. :issue:`409` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Enhancement
 ...........
 
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index dec122943..940ca0369 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -16,7 +16,6 @@
 from ..tensorflow import balanced_batch_generator as tf_bbg
 
 
-@Substitution(random_state=_random_state_docstring)
 class BalancedBatchGenerator(keras.utils.Sequence):
     """Create balanced batches when training a keras model.
 

From 00d2a05e0335c32a0038b542407d0d87390fb127 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 27 Jul 2018 22:35:01 +0200
Subject: [PATCH 35/50] iter

---
 imblearn/keras/_generator.py      | 2 +-
 imblearn/tensorflow/_generator.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index 940ca0369..8debae10b 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -66,8 +66,8 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     >>> class_dict = dict()
     >>> class_dict[0] = 30; class_dict[1] = 50; class_dict[2] = 40
     >>> X, y = make_imbalance(iris.data, iris.target, class_dict)
-    >>> y = keras.utils.to_categorical(y, 3)
     >>> import keras
+    >>> y = keras.utils.to_categorical(y, 3)
     >>> model = keras.models.Sequential()
     >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
     ...                              activation='softmax'))
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index 868dd1a89..75e8fa57e 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -2,11 +2,15 @@
 
 from __future__ import division
 
+import pytest
+
 from sklearn.base import clone
 from sklearn.utils import safe_indexing
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import set_random_state
 
+tf = pytest.importorskip("tensorflow")
+
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring

From f677e6b3ecc877c594664edeb678035c85ce378d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Sat, 28 Jul 2018 21:40:03 +0200
Subject: [PATCH 36/50] EHN add parameter to preserve sparsity

---
 imblearn/keras/_generator.py                | 60 +++++++++++++--------
 imblearn/keras/tests/test_generator.py      | 20 +++++++
 imblearn/tensorflow/_generator.py           | 36 +++++++++----
 imblearn/tensorflow/tests/test_generator.py | 14 +++++
 4 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index 8debae10b..d634a2ba0 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -3,6 +3,8 @@
 
 import pytest
 
+from scipy.sparse import issparse
+
 from sklearn.base import clone
 from sklearn.utils import safe_indexing
 from sklearn.utils import check_random_state
@@ -41,6 +43,11 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
+    sparse : bool, optional (default=False)
+        Either or not to conserve or not the sparsity of the input (i.e. ``X``,
+        ``y``, ``sample_weight``). By default, the returned batches will be
+        dense.
+
     random_state : int, RandomState instance or None, optional (default=None)
         Control the randomization of the algorithm
         - If int, ``random_state`` is the seed used by the random number
@@ -82,12 +89,13 @@ class BalancedBatchGenerator(keras.utils.Sequence):
 
     """
     def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
-                 random_state=None):
+                 sparse=False, random_state=None):
         self.X = X
         self.y = y
         self.sample_weight = sample_weight
         self.sampler = sampler
         self.batch_size = batch_size
+        self.sparse = sparse
         self.random_state = random_state
         self._sample()
 
@@ -113,32 +121,35 @@ def __len__(self):
         return int(self.indices_.size // self.batch_size)
 
     def __getitem__(self, index):
+        X_resampled = safe_indexing(
+            self.X, self.indices_[index * self.batch_size:
+                                  (index + 1) * self.batch_size])
+        if issparse(X_resampled) and not self.sparse:
+            X_resampled = X_resampled.toarray()
+
+        y_resampled = safe_indexing(
+            self.y, self.indices_[index * self.batch_size:
+                                  (index + 1) * self.batch_size])
+        if issparse(y_resampled) and not self.sparse:
+            y_resampled = y_resampled.toarray()
+
+        if self.sample_weight is not None:
+            sample_weight_resampled = safe_indexing(
+                self.sample_weight,
+                self.indices_[index * self.batch_size:
+                              (index + 1) * self.batch_size])
+            if issparse(sample_weight_resampled) and not self.sparse:
+                sample_weight = sample_weight.toarray()
+
         if self.sample_weight is None:
-            return (
-                safe_indexing(self.X,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.y,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size])
-            )
+            return X_resampled, y_resampled
         else:
-            return (
-                safe_indexing(self.X,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.y,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size]),
-                safe_indexing(self.sample_weight,
-                              self.indices_[index * self.batch_size:
-                                            (index + 1) * self.batch_size])
-            )
+            return X_resampled, y_resampled, sample_weight_resampled
 
 
 @Substitution(random_state=_random_state_docstring)
 def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
-                             batch_size=32, random_state=None):
+                             batch_size=32, sparse=False, random_state=None):
     """Create a balanced batch generator to train keras model.
 
     Returns a generator --- as well as the number of step per epoch --- which
@@ -163,6 +174,11 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
+    sparse : bool, optional (default=False)
+        Either or not to conserve or not the sparsity of the input (i.e. ``X``,
+        ``y``, ``sample_weight``). By default, the returned batches will be
+        dense.
+
     {random_state}
 
     Returns
@@ -203,4 +219,4 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
 
     return tf_bbg(X=X, y=y, sample_weight=sample_weight,
                   sampler=sampler, batch_size=batch_size,
-                  random_state=random_state)
+                  sparse=sparse, random_state=random_state)
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index d94dd73da..8f880029c 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -1,6 +1,7 @@
 import pytest
 
 import numpy as np
+from scipy import sparse
 
 from sklearn.datasets import load_iris
 
@@ -51,6 +52,16 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
                         epochs=10)
 
 
+def test_balanced_batch_generator_class_sparse():
+    training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
+                                                batch_size=100,
+                                                sparse=True,
+                                                random_state=42)
+    for idx in range(len(training_generator)):
+        X_batch, y_batch = training_generator.__getitem__(idx)
+        assert sparse.issparse(X_batch)
+
+
 def test_balanced_batch_generator_function_no_return_indices():
     with pytest.raises(ValueError, match='needs to return the indices'):
         balanced_batch_generator(
@@ -71,3 +82,12 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
     model.fit_generator(generator=training_generator,
                         steps_per_epoch=steps_per_epoch,
                         epochs=10)
+
+
+def test_balanced_batch_generator_function_sparse():
+    training_generator, steps_per_epoch = balanced_batch_generator(
+        sparse.csr_matrix(X), y, sparse=True, batch_size=10,
+        random_state=42)
+    for idx in range(steps_per_epoch):
+        X_batch, y_batch = next(training_generator)
+        assert sparse.issparse(X_batch)
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index 75e8fa57e..3f904cc90 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -4,6 +4,8 @@
 
 import pytest
 
+from scipy.sparse import issparse
+
 from sklearn.base import clone
 from sklearn.utils import safe_indexing
 from sklearn.utils import check_random_state
@@ -18,7 +20,7 @@
 
 @Substitution(random_state=_random_state_docstring)
 def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
-                             batch_size=32, random_state=None):
+                             batch_size=32, sparse=False, random_state=None):
     """Create a balanced batch generator to train keras model.
 
     Returns a generator --- as well as the number of step per epoch --- which
@@ -43,6 +45,11 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
+    sparse : bool, optional (default=False)
+        Either or not to conserve or not the sparsity of the input (i.e. ``X``,
+        ``y``, ``sample_weight``). By default, the returned batches will be
+        dense.
+
     {random_state}
 
     Returns
@@ -52,8 +59,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
         y_batch) or (X_batch, y_batch, sampler_weight_batch).
 
     steps_per_epoch : int
-        The number of samples per epoch. Required by ``fit_generator`` in
-        keras.
+        The number of samples per epoch.
 
     Examples
     --------
@@ -133,15 +139,27 @@ def generator(X, y, sample_weight, indices, batch_size):
         if sample_weight is None:
             while True:
                 for index in range(0, len(indices), batch_size):
-                    yield (safe_indexing(X, indices[index:index + batch_size]),
-                           safe_indexing(y, indices[index:index + batch_size]))
+                    X_res = safe_indexing(X, indices[index:index + batch_size])
+                    y_res = safe_indexing(y, indices[index:index + batch_size])
+                    if issparse(X_res) and not sparse:
+                        X_res = X_res.toarray()
+                    if issparse(y_res) and not sparse:
+                        y_res = y_res.toarray()
+                    yield X_res, y_res
         else:
             while True:
                 for index in range(0, len(indices), batch_size):
-                    yield (safe_indexing(X, indices[index:index + batch_size]),
-                           safe_indexing(y, indices[index:index + batch_size]),
-                           safe_indexing(sample_weight,
-                                         indices[index:index + batch_size]))
+                    X_res = safe_indexing(X, indices[index:index + batch_size])
+                    y_res = safe_indexing(y, indices[index:index + batch_size])
+                    sw_res = safe_indexing(sample_weight,
+                                           indices[index:index + batch_size])
+                    if issparse(X_res) and not sparse:
+                        X_res = X_res.toarray()
+                    if issparse(y_res) and not sparse:
+                        y_res = y_res.toarray()
+                    if issparse(sw_res) and not sparse:
+                        sw_res = sw_res.toarray()
+                    yield X_res, y_res, sw_res
 
     return (generator(X, y, sample_weight, indices, batch_size),
             int(indices.size // batch_size))
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
index 2045690e4..22ca16500 100644
--- a/imblearn/tensorflow/tests/test_generator.py
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -2,6 +2,7 @@
 
 import pytest
 import numpy as np
+from scipy import sparse
 
 from sklearn.datasets import load_iris
 
@@ -72,3 +73,16 @@ def accuracy(y_true, y_pred):
             predicts_train = sess.run(predict, feed_dict={data: X})
             print("epoch: {} train accuracy: {:.3f}"
                   .format(e, accuracy(y, predicts_train)))
+
+
+def test_balanced_batch_generator_function_sparse():
+    X, y = load_iris(return_X_y=True)
+    X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
+    X = X.astype(np.float32)
+
+    training_generator, steps_per_epoch = balanced_batch_generator(
+        sparse.csr_matrix(X), y, sparse=True, batch_size=10,
+        random_state=42)
+    for idx in range(steps_per_epoch):
+        X_batch, y_batch = next(training_generator)
+        assert sparse.issparse(X_batch)

From ccef644af7eb73adfc66271ad5e56f1abdbaca50 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 31 Jul 2018 05:09:14 +0200
Subject: [PATCH 37/50] DOC mention default sampler

---
 imblearn/keras/_generator.py      | 8 ++++++--
 imblearn/tensorflow/_generator.py | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index d634a2ba0..dd0f92ed7 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -37,8 +37,10 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     sample_weight : ndarray, shape (n_samples,)
         Sample weight.
 
-    sampler : object or None, optional (default=None)
+    sampler : object or None, optional (default=RandomUnderSampler)
         A sampler instance which has an attribute ``return_indices``.
+        By default, the sampler used is a
+        :class:`imblearn.under_sampling.RandomUnderSampler`.
 
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
@@ -168,8 +170,10 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     sample_weight : ndarray, shape (n_samples,)
         Sample weight.
 
-    sampler : object or None, optional (default=None)
+    sampler : object or None, optional (default=RandomUnderSampler)
         A sampler instance which has an attribute ``return_indices``.
+        By default, the sampler used is a
+        :class:`imblearn.under_sampling.RandomUnderSampler`.
 
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index 3f904cc90..262ece894 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -39,8 +39,10 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     sample_weight : ndarray, shape (n_samples,)
         Sample weight.
 
-    sampler : object or None, optional (default=None)
+    sampler : object or None, optional (default=RandomUnderSampler)
         A sampler instance which has an attribute ``return_indices``.
+        By default, the sampler used is a
+        :class:`imblearn.under_sampling.RandomUnderSampler`.
 
     batch_size : int, optional (default=32)
         Number of samples per gradient update.

From 032c79101b3efef9c6b7ab3b1cd9b8f43edebeaf Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 16:19:39 +0200
Subject: [PATCH 38/50] use doctest ignore import errors

---
 imblearn/keras/_generator.py      | 6 ++----
 imblearn/tensorflow/_generator.py | 4 ----
 setup.cfg                         | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index dd0f92ed7..98edcf7ef 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -1,7 +1,7 @@
 """Implement generators for ``keras`` which will balance the data."""
 from __future__ import division
 
-import pytest
+import keras
 
 from scipy.sparse import issparse
 
@@ -10,8 +10,6 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import set_random_state
 
-keras = pytest.importorskip("keras")
-
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
@@ -141,7 +139,7 @@ def __getitem__(self, index):
                 self.indices_[index * self.batch_size:
                               (index + 1) * self.batch_size])
             if issparse(sample_weight_resampled) and not self.sparse:
-                sample_weight = sample_weight.toarray()
+                sample_weight_resampled = sample_weight_resampled.toarray()
 
         if self.sample_weight is None:
             return X_resampled, y_resampled
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index 262ece894..c1da5ae58 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -2,8 +2,6 @@
 
 from __future__ import division
 
-import pytest
-
 from scipy.sparse import issparse
 
 from sklearn.base import clone
@@ -11,8 +9,6 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import set_random_state
 
-tf = pytest.importorskip("tensorflow")
-
 from ..under_sampling import RandomUnderSampler
 from ..utils import Substitution
 from ..utils._docstring import _random_state_docstring
diff --git a/setup.cfg b/setup.cfg
index 56cfb932a..b39529093 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,4 +35,4 @@ doctest-fixtures = _fixture
 [tool:pytest]
 addopts = 
 	--doctest-modules
-
+        --doctest-ignore-import-errors

From c056567da212fba639ce7b3f80dccf92fbd11e69 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 17:32:27 +0200
Subject: [PATCH 39/50] iter

---
 build_tools/travis/install.sh          |  7 +++++--
 imblearn/keras/_generator.py           | 12 ++++++++++--
 imblearn/keras/tests/test_generator.py | 10 +++++-----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index e094117cc..be06095db 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -39,7 +39,10 @@ if [[ "$DISTRIB" == "conda" ]]; then
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
-    conda install --yes pandas keras
+    # only install optional dependency in python 3.6
+    if [[ $PYTHON_VERSION == "3.6" ]]; then
+        conda install --yes pandas keras
+    fi
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython
@@ -70,7 +73,7 @@ python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
 
-python setup.py develop
+pip install -e .
 ccache --show-stats
 # Useful for debugging how ccache is used
 # cat $CCACHE_LOGFILE
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index 98edcf7ef..251e1eb59 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -1,7 +1,13 @@
 """Implement generators for ``keras`` which will balance the data."""
 from __future__ import division
 
-import keras
+try:
+    import keras
+    ParentClass = keras.utils.Sequence
+    HAS_KERAS = True
+except ImportError:
+    ParentClass = object
+    HAS_KERAS = False
 
 from scipy.sparse import issparse
 
@@ -16,7 +22,7 @@
 from ..tensorflow import balanced_batch_generator as tf_bbg
 
 
-class BalancedBatchGenerator(keras.utils.Sequence):
+class BalancedBatchGenerator(ParentClass):
     """Create balanced batches when training a keras model.
 
     Create a keras ``Sequence`` which is given to ``fit_generator``. The
@@ -90,6 +96,8 @@ class BalancedBatchGenerator(keras.utils.Sequence):
     """
     def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
                  sparse=False, random_state=None):
+        if not HAS_KERAS:
+            raise ImportError("'No module named 'keras'")
         self.X = X
         self.y = y
         self.sample_weight = sample_weight
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index 8f880029c..00c138d30 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -5,6 +5,11 @@
 
 from sklearn.datasets import load_iris
 
+keras = pytest.importorskip('keras')
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.utils import to_categorical
+
 from imblearn.datasets import make_imbalance
 from imblearn.under_sampling import ClusterCentroids
 from imblearn.under_sampling import NearMiss
@@ -12,11 +17,6 @@
 from imblearn.keras import BalancedBatchGenerator
 from imblearn.keras import balanced_batch_generator
 
-keras = pytest.importorskip('keras')
-from keras.models import Sequential
-from keras.layers import Dense
-from keras.utils import to_categorical
-
 iris = load_iris()
 X, y = make_imbalance(iris.data, iris.target, {0: 30, 1: 50, 2: 40})
 y = to_categorical(y, 3)

From 83b5fea09946574459736b8341c99f0341a9310a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 17:58:18 +0200
Subject: [PATCH 40/50] iter

---
 conftest.py                  | 15 +++++++++++++++
 imblearn/keras/_generator.py |  3 +++
 setup.cfg                    |  4 +---
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/conftest.py b/conftest.py
index 31da29707..86ec47899 100644
--- a/conftest.py
+++ b/conftest.py
@@ -7,9 +7,24 @@
 
 # Set numpy array str/repr to legacy behaviour on numpy > 1.13 to make
 # the doctests pass
+import pytest
 import numpy as np
 
 try:
     np.set_printoptions(legacy='1.13')
 except TypeError:
     pass
+
+
+def pytest_runtest_setup(item):
+    fname = item.fspath.strpath
+    if fname.endswith('keras/_generator.py'):
+        try:
+            import keras
+        except ImportError:
+            pytest.skip('The keras package is not installed.')
+    elif fname.endswith('tensorflow/_generator.py'):
+        try:
+            import tensorflow
+        except ImportError:
+            pytest.skip('The tensorflow package is not installed.')
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index 251e1eb59..c4a40fa3f 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -1,6 +1,9 @@
 """Implement generators for ``keras`` which will balance the data."""
 from __future__ import division
 
+# This is a trick to avoid an error during tests collection with pytest. We
+# avoid the error when importing the package raise the error at the moment of
+# creating the instance.
 try:
     import keras
     ParentClass = keras.utils.Sequence
diff --git a/setup.cfg b/setup.cfg
index b39529093..50f9c583a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,6 +33,4 @@ doctest-extension = rst
 doctest-fixtures = _fixture
 
 [tool:pytest]
-addopts = 
-	--doctest-modules
-        --doctest-ignore-import-errors
+addopts = --doctest-modules

From c7b4a0a00c4efd20f123f9b739929301b6aac1dd Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 18:10:01 +0200
Subject: [PATCH 41/50] iter

---
 conftest.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/conftest.py b/conftest.py
index 86ec47899..455b0f00a 100644
--- a/conftest.py
+++ b/conftest.py
@@ -18,12 +18,14 @@
 
 def pytest_runtest_setup(item):
     fname = item.fspath.strpath
-    if fname.endswith('keras/_generator.py'):
+    if (fname.endswith('keras/_generator.py') or
+            fname.endswith('miscellaneous.rst')):
         try:
             import keras
         except ImportError:
             pytest.skip('The keras package is not installed.')
-    elif fname.endswith('tensorflow/_generator.py'):
+    elif (fname.endswith('tensorflow/_generator.py') or
+          fname.endswith('miscellaneous.rst')):
         try:
             import tensorflow
         except ImportError:

From cd99f9a5f8a15183b8c887623cbc78e5744f39a9 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 18:13:31 +0200
Subject: [PATCH 42/50] join path for appveyor

---
 conftest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/conftest.py b/conftest.py
index 455b0f00a..d3ff91025 100644
--- a/conftest.py
+++ b/conftest.py
@@ -7,6 +7,7 @@
 
 # Set numpy array str/repr to legacy behaviour on numpy > 1.13 to make
 # the doctests pass
+import os
 import pytest
 import numpy as np
 
@@ -18,13 +19,13 @@
 
 def pytest_runtest_setup(item):
     fname = item.fspath.strpath
-    if (fname.endswith('keras/_generator.py') or
+    if (fname.endswith(os.path.join('keras', '_generator.py')) or
             fname.endswith('miscellaneous.rst')):
         try:
             import keras
         except ImportError:
             pytest.skip('The keras package is not installed.')
-    elif (fname.endswith('tensorflow/_generator.py') or
+    elif (fname.endswith(os.path.join('tensorflow', '_generator.py')) or
           fname.endswith('miscellaneous.rst')):
         try:
             import tensorflow

From b174c7f4982e5d902d5e80c0ef77f88fc8798c34 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 21 Aug 2018 22:26:57 +0200
Subject: [PATCH 43/50] Update the sparse array tests

---
 imblearn/keras/_generator.py                | 10 ++-----
 imblearn/keras/tests/test_generator.py      | 20 +++++++++----
 imblearn/tensorflow/_generator.py           | 33 +++++++--------------
 imblearn/tensorflow/tests/test_generator.py | 15 +++++-----
 4 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
index c4a40fa3f..c3f5a4af1 100644
--- a/imblearn/keras/_generator.py
+++ b/imblearn/keras/_generator.py
@@ -135,22 +135,16 @@ def __getitem__(self, index):
         X_resampled = safe_indexing(
             self.X, self.indices_[index * self.batch_size:
                                   (index + 1) * self.batch_size])
-        if issparse(X_resampled) and not self.sparse:
-            X_resampled = X_resampled.toarray()
-
         y_resampled = safe_indexing(
             self.y, self.indices_[index * self.batch_size:
                                   (index + 1) * self.batch_size])
-        if issparse(y_resampled) and not self.sparse:
-            y_resampled = y_resampled.toarray()
-
+        if issparse(X_resampled) and not self.sparse:
+            X_resampled = X_resampled.toarray()
         if self.sample_weight is not None:
             sample_weight_resampled = safe_indexing(
                 self.sample_weight,
                 self.indices_[index * self.batch_size:
                               (index + 1) * self.batch_size])
-            if issparse(sample_weight_resampled) and not self.sparse:
-                sample_weight_resampled = sample_weight_resampled.toarray()
 
         if self.sample_weight is None:
             return X_resampled, y_resampled
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index 00c138d30..ecef97979 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -52,14 +52,18 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
                         epochs=10)
 
 
-def test_balanced_batch_generator_class_sparse():
+@pytest.mark.parametrize("is_sparse", [True, False])
+def test_balanced_batch_generator_class_sparse(is_sparse):
     training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
                                                 batch_size=100,
-                                                sparse=True,
+                                                sparse=is_sparse,
                                                 random_state=42)
     for idx in range(len(training_generator)):
         X_batch, y_batch = training_generator.__getitem__(idx)
-        assert sparse.issparse(X_batch)
+        if is_sparse:
+            assert sparse.issparse(X_batch)
+        else:
+            assert not sparse.issparse(X_batch)
 
 
 def test_balanced_batch_generator_function_no_return_indices():
@@ -84,10 +88,14 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
                         epochs=10)
 
 
-def test_balanced_batch_generator_function_sparse():
+@pytest.mark.parametrize("is_sparse", [True, False])
+def test_balanced_batch_generator_function_sparse(is_sparse):
     training_generator, steps_per_epoch = balanced_batch_generator(
-        sparse.csr_matrix(X), y, sparse=True, batch_size=10,
+        sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
         random_state=42)
     for idx in range(steps_per_epoch):
         X_batch, y_batch = next(training_generator)
-        assert sparse.issparse(X_batch)
+        if is_sparse:
+            assert sparse.issparse(X_batch)
+        else:
+            assert not sparse.issparse(X_batch)
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
index c1da5ae58..9b0cb06d5 100644
--- a/imblearn/tensorflow/_generator.py
+++ b/imblearn/tensorflow/_generator.py
@@ -44,9 +44,8 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
         Number of samples per gradient update.
 
     sparse : bool, optional (default=False)
-        Either or not to conserve or not the sparsity of the input (i.e. ``X``,
-        ``y``, ``sample_weight``). By default, the returned batches will be
-        dense.
+        Either or not to conserve or not the sparsity of the input ``X``. By
+        default, the returned batches will be dense.
 
     {random_state}
 
@@ -134,29 +133,17 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     random_state.shuffle(indices)
 
     def generator(X, y, sample_weight, indices, batch_size):
-        if sample_weight is None:
-            while True:
-                for index in range(0, len(indices), batch_size):
-                    X_res = safe_indexing(X, indices[index:index + batch_size])
-                    y_res = safe_indexing(y, indices[index:index + batch_size])
-                    if issparse(X_res) and not sparse:
-                        X_res = X_res.toarray()
-                    if issparse(y_res) and not sparse:
-                        y_res = y_res.toarray()
+        while True:
+            for index in range(0, len(indices), batch_size):
+                X_res = safe_indexing(X, indices[index:index + batch_size])
+                y_res = safe_indexing(y, indices[index:index + batch_size])
+                if issparse(X_res) and not sparse:
+                    X_res = X_res.toarray()
+                if sample_weight is None:
                     yield X_res, y_res
-        else:
-            while True:
-                for index in range(0, len(indices), batch_size):
-                    X_res = safe_indexing(X, indices[index:index + batch_size])
-                    y_res = safe_indexing(y, indices[index:index + batch_size])
+                else:
                     sw_res = safe_indexing(sample_weight,
                                            indices[index:index + batch_size])
-                    if issparse(X_res) and not sparse:
-                        X_res = X_res.toarray()
-                    if issparse(y_res) and not sparse:
-                        y_res = y_res.toarray()
-                    if issparse(sw_res) and not sparse:
-                        sw_res = sw_res.toarray()
                     yield X_res, y_res, sw_res
 
     return (generator(X, y, sample_weight, indices, batch_size),
diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
index 22ca16500..48bce2af6 100644
--- a/imblearn/tensorflow/tests/test_generator.py
+++ b/imblearn/tensorflow/tests/test_generator.py
@@ -14,10 +14,7 @@
 tf = pytest.importorskip('tensorflow')
 
 
-@pytest.mark.parametrize(
-    "sampler",
-    [None, NearMiss()]
-)
+@pytest.mark.parametrize("sampler", [None, NearMiss()])
 def test_balanced_batch_generator(sampler):
     X, y = load_iris(return_X_y=True)
     X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
@@ -75,14 +72,18 @@ def accuracy(y_true, y_pred):
                   .format(e, accuracy(y, predicts_train)))
 
 
-def test_balanced_batch_generator_function_sparse():
+@pytest.mark.parametrize("is_sparse", [True, False])
+def test_balanced_batch_generator_function_sparse(is_sparse):
     X, y = load_iris(return_X_y=True)
     X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
     X = X.astype(np.float32)
 
     training_generator, steps_per_epoch = balanced_batch_generator(
-        sparse.csr_matrix(X), y, sparse=True, batch_size=10,
+        sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
         random_state=42)
     for idx in range(steps_per_epoch):
         X_batch, y_batch = next(training_generator)
-        assert sparse.issparse(X_batch)
+        if is_sparse:
+            assert sparse.issparse(X_batch)
+        else:
+            assert not sparse.issparse(X_batch)

From 8c9ae944160a39a3a5f7fdf70512c94499c773c5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 00:15:11 +0200
Subject: [PATCH 44/50] add example

---
 .../porto_seguro_keras_under_sampling.py      | 235 ++++++++++++++++++
 imblearn/keras/tests/test_generator.py        |   2 +-
 2 files changed, 236 insertions(+), 1 deletion(-)
 create mode 100644 examples/applications/porto_seguro_keras_under_sampling.py

diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
new file mode 100644
index 000000000..1477efd89
--- /dev/null
+++ b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -0,0 +1,235 @@
+"""
+==========================================================
+Porto Seguro: balancing samples in mini-batches with Keras
+==========================================================
+
+This example compares two strategies to train a neural-network on the Porto
+Seguro Kaggle data set [1]_. The data set is imbalanced and we show that
+balancing each mini-batch allows to improve performance and reduce the training
+time.
+
+References
+----------
+
+.. [1] https://www.kaggle.com/c/porto-seguro-safe-driver-prediction/data
+
+"""
+
+# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
+# License: MIT
+
+print(__doc__)
+
+###############################################################################
+# Data loading
+###############################################################################
+
+from collections import Counter
+import pandas as pd
+import numpy as np
+
+###############################################################################
+# First, you should download the Porto Seguro data set from Kaggle. See the
+# link in the introduction.
+
+training_data = pd.read_csv('./input/train.csv')
+testing_data = pd.read_csv('./input/test.csv')
+
+y_train = training_data[['id', 'target']].set_index('id')
+X_train = training_data.drop(['target'], axis=1).set_index('id')
+X_test = testing_data.set_index('id')
+
+###############################################################################
+# The data set is imbalanced and it will have an effect on the fitting.
+
+print('The data set is imbalanced: {}'.format(Counter(y_train['target'])))
+
+###############################################################################
+# Define the pre-processing pipeline
+###############################################################################
+
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.impute import SimpleImputer
+
+
+def convert_float64(X):
+    return X.astype(np.float64)
+
+
+###############################################################################
+# We want to standard scale the numerical features while we want to one-hot
+# encode the categorical features. In this regard, we make use of the
+# :class:`sklearn.compose.ColumnTransformer`.
+
+numerical_columns = [name for name in X_train.columns
+                     if '_calc_' in name and '_bin' not in name]
+numerical_pipeline = make_pipeline(
+    FunctionTransformer(func=convert_float64, validate=False),
+    StandardScaler())
+
+categorical_columns = [name for name in X_train.columns
+                       if '_cat' in name]
+categorical_pipeline = make_pipeline(
+    SimpleImputer(missing_values=-1, strategy='most_frequent'),
+    OneHotEncoder(categories='auto'))
+
+preprocessor = ColumnTransformer([('num', numerical_pipeline, num_col),
+                                  ('cat', categorical_pipeline, cat_col)],
+                                 remainder='drop')
+
+# Create an environment variable to avoid using the GPU. This can be changed.
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
+
+###############################################################################
+# Create a neural-network
+###############################################################################
+
+from keras.models import Sequential
+from keras.layers import Activation, Dense, Dropout, BatchNormalization
+
+
+def make_model(n_features):
+    model = Sequential()
+    model.add(Dense(200, input_shape=(n_features,),
+              kernel_initializer='glorot_normal'))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.5))
+    model.add(Dense(100, kernel_initializer='glorot_normal'))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.25))
+    model.add(Dense(50, kernel_initializer='glorot_normal'))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.15))
+    model.add(Dense(25, kernel_initializer='glorot_normal'))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.1))
+    model.add(Dense(1, activation='sigmoid'))
+
+    model.compile(loss='binary_crossentropy',
+                  optimizer='adam',
+                  metrics=['accuracy'])
+
+    return model
+
+
+###############################################################################
+# We create a decorator to report the computation time
+
+import time
+from functools import wraps
+
+
+def timeit(f):
+    @wraps(f)
+    def wrapper(*args, **kwds):
+        start_time = time.time()
+        result = f(*args, **kwds)
+        elapsed_time = time.time() - start_time
+        print('Elapsed computation time: {:.3f} secs'
+              .format(elapsed_time))
+        return (elapsed_time, result)
+    return wrapper
+
+
+###############################################################################
+# The first model will be trained using the ``fit`` method and with imbalanced
+# mini-batches.
+
+from sklearn.metrics import roc_auc_score
+
+
+@timeit
+def fit_predict_imbalanced_model(X_train, y_train, X_test, y_test):
+    model = make_model(X_train.shape[1])
+    model.fit(X_train, y_train, epochs=2, verbose=0, batch_size=1000)
+    y_pred = model.predict_proba(X_test, batch_size=1000)
+    return roc_auc_score(y_test, y_pred)
+
+
+###############################################################################
+# In the contrary, we will use imbalanced-learn to create a generator of
+# mini-batches which will yield balanced mini-batches.
+
+from imblearn.keras import BalancedBatchGenerator
+
+
+@timeit
+def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
+    model = make_model(X_train.shape[1])
+    training_generator = BalancedBatchGenerator(X_train, y_train,
+                                                batch_size=1000,
+                                                random_state=42)
+    model.fit_generator(generator=training_generator, epochs=5, verbose=0)
+    y_pred = model.predict_proba(X_test, batch_size=1000)
+    return roc_auc_score(y_test, y_pred)
+
+
+###############################################################################
+# Classification loop
+###############################################################################
+
+###############################################################################
+# We will perform a 10-fold cross-validation and train the neural-network with
+# the two different strategies previously presented.
+
+from sklearn.model_selection import StratifiedKFold
+
+skf = StratifiedKFold(n_splits=10)
+
+cv_results_imbalanced = []
+cv_time_imbalanced = []
+cv_results_balanced = []
+cv_time_balanced = []
+for train_idx, valid_idx in skf.split(X_train, y_train):
+    X_local_train = preprocessor.fit_transform(X_train.iloc[train_idx])
+    y_local_train = y_train.iloc[train_idx].values.ravel()
+    X_local_test = preprocessor.transform(X_train.iloc[valid_idx])
+    y_local_test = y_train.iloc[valid_idx].values.ravel()
+
+    elapsed_time, roc_auc = fit_predict_imbalanced_model(
+        X_local_train, y_local_train, X_local_test, y_local_test)
+    cv_time_imbalanced.append(elapsed_time)
+    cv_results_imbalanced.append(roc_auc)
+
+    elapsed_time, roc_auc = fit_predict_balanced_model(
+        X_local_train, y_local_train, X_local_test, y_local_test)
+    cv_time_balanced.append(elapsed_time)
+    cv_results_balanced.append(roc_auc)
+
+###############################################################################
+# Plot of the results and computation time
+###############################################################################
+
+df_results = (pd.DataFrame({'Balanced model': cv_results_balanced,
+                            'Imbalanced model': cv_results_imbalanced})
+              .unstack().reset_index())
+df_time = (pd.DataFrame({'Balanced model': cv_time_balanced,
+                         'Imbalanced model': cv_time_imbalanced})
+           .unstack().reset_index())
+
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+sns.boxplot(y='level_0', x=0, data=df_results, whis=10.0)
+sns.despine(top=True, right=True, left=True)
+ax = plt.gca()
+ax.xaxis.set_major_formatter(
+    plt.FuncFormatter(lambda x, pos: "%i%%" % (100 * x)))
+plt.xlabel('ROC-AUC')
+plt.ylabel('')
+plt.title('Difference in terms of ROC-AUC using a random under-sampling')
+
+sns.boxplot(y='level_0', x=0, data=df_time)
+sns.despine(top=True, right=True, left=True)
+plt.xlabel('time [s]')
+plt.ylabel('')
+plt.title('Computation time difference using a random under-sampling')
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
index ecef97979..7b0491146 100644
--- a/imblearn/keras/tests/test_generator.py
+++ b/imblearn/keras/tests/test_generator.py
@@ -55,7 +55,7 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
 @pytest.mark.parametrize("is_sparse", [True, False])
 def test_balanced_batch_generator_class_sparse(is_sparse):
     training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
-                                                batch_size=100,
+                                                batch_size=10,
                                                 sparse=is_sparse,
                                                 random_state=42)
     for idx in range(len(training_generator)):

From baa56ad1725321a973eee7c1917040c2c2bf7e59 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 00:22:01 +0200
Subject: [PATCH 45/50] iter

---
 .../applications/porto_seguro_keras_under_sampling.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
index 1477efd89..d06669433 100644
--- a/examples/applications/porto_seguro_keras_under_sampling.py
+++ b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -77,9 +77,10 @@ def convert_float64(X):
     SimpleImputer(missing_values=-1, strategy='most_frequent'),
     OneHotEncoder(categories='auto'))
 
-preprocessor = ColumnTransformer([('num', numerical_pipeline, num_col),
-                                  ('cat', categorical_pipeline, cat_col)],
-                                 remainder='drop')
+preprocessor = ColumnTransformer(
+    [('numerical_preprocessing', numerical_pipeline, numerical_columns),
+     ('categorical_preprocessing', categorical_pipeline, categorical_columns)],
+    remainder='drop')
 
 # Create an environment variable to avoid using the GPU. This can be changed.
 import os
@@ -150,7 +151,7 @@ def wrapper(*args, **kwds):
 @timeit
 def fit_predict_imbalanced_model(X_train, y_train, X_test, y_test):
     model = make_model(X_train.shape[1])
-    model.fit(X_train, y_train, epochs=2, verbose=0, batch_size=1000)
+    model.fit(X_train, y_train, epochs=2, verbose=1, batch_size=1000)
     y_pred = model.predict_proba(X_test, batch_size=1000)
     return roc_auc_score(y_test, y_pred)
 
@@ -168,7 +169,7 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
     training_generator = BalancedBatchGenerator(X_train, y_train,
                                                 batch_size=1000,
                                                 random_state=42)
-    model.fit_generator(generator=training_generator, epochs=5, verbose=0)
+    model.fit_generator(generator=training_generator, epochs=5, verbose=1)
     y_pred = model.predict_proba(X_test, batch_size=1000)
     return roc_auc_score(y_test, y_pred)
 

From 679f30ca906ede4a2cb63288c6dc095bb0139d28 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 00:53:33 +0200
Subject: [PATCH 46/50] iter example

---
 .../porto_seguro_keras_under_sampling.py             | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
index d06669433..7bc7c655c 100644
--- a/examples/applications/porto_seguro_keras_under_sampling.py
+++ b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -220,6 +220,12 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
 import seaborn as sns
 import matplotlib.pyplot as plt
 
+sns.boxplot(y='level_0', x=0, data=df_time)
+sns.despine(top=True, right=True, left=True)
+plt.xlabel('time [s]')
+plt.ylabel('')
+plt.title('Computation time difference using a random under-sampling')
+
 sns.boxplot(y='level_0', x=0, data=df_results, whis=10.0)
 sns.despine(top=True, right=True, left=True)
 ax = plt.gca()
@@ -228,9 +234,3 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
 plt.xlabel('ROC-AUC')
 plt.ylabel('')
 plt.title('Difference in terms of ROC-AUC using a random under-sampling')
-
-sns.boxplot(y='level_0', x=0, data=df_time)
-sns.despine(top=True, right=True, left=True)
-plt.xlabel('time [s]')
-plt.ylabel('')
-plt.title('Computation time difference using a random under-sampling')

From 324b90c9c637d7edb693e682a7de8b5ae0569299 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 11:22:03 +0200
Subject: [PATCH 47/50] tests

---
 build_tools/travis/install.sh | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index be06095db..df4d23378 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -40,9 +40,6 @@ if [[ "$DISTRIB" == "conda" ]]; then
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
     # only install optional dependency in python 3.6
-    if [[ $PYTHON_VERSION == "3.6" ]]; then
-        conda install --yes pandas keras
-    fi
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython
@@ -51,6 +48,13 @@ if [[ "$DISTRIB" == "conda" ]]; then
         conda install --yes scikit-learn=$SKLEARN_VERSION
     fi
 
+    if [[ $PYTHON_VERSION == "3.6" ]]; then
+        conda install --yes pandas keras
+        KERAS_BACKEND=tensorflow
+        python -c "import keras.backend"
+        sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
+    fi
+
     conda install --yes nose pytest pytest-cov
     # Install nose-timer via pip
     pip install nose-timer codecov

From 22fb0a1afc08fd4ad58a9d06633fa0e411d9cd5a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 11:40:46 +0200
Subject: [PATCH 48/50] iter

---
 .travis.yml                                                | 6 +++---
 build_tools/travis/install.sh                              | 6 +++---
 examples/applications/porto_seguro_keras_under_sampling.py | 2 ++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 36b502320..ba5b8f33e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,11 +38,11 @@ matrix:
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.19.0"
     - env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.19.0"
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.7"
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="master"
   allow_failures:
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="master"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.7"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
 
 install: source build_tools/travis/install.sh
 script: bash build_tools/travis/test_script.sh
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index df4d23378..82dc10a97 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -39,7 +39,6 @@ if [[ "$DISTRIB" == "conda" ]]; then
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
-    # only install optional dependency in python 3.6
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython
@@ -48,8 +47,9 @@ if [[ "$DISTRIB" == "conda" ]]; then
         conda install --yes scikit-learn=$SKLEARN_VERSION
     fi
 
-    if [[ $PYTHON_VERSION == "3.6" ]]; then
-        conda install --yes pandas keras
+    if [[ $PYTHON_VERSION == "3.6" ]] || [[ $PYTHON_VERSION == "3.7" ]]; then
+        conda install --yes pandas
+        conda install --yes -c conda-forge keras
         KERAS_BACKEND=tensorflow
         python -c "import keras.backend"
         sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
index 7bc7c655c..c154362d9 100644
--- a/examples/applications/porto_seguro_keras_under_sampling.py
+++ b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -220,12 +220,14 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
 import seaborn as sns
 import matplotlib.pyplot as plt
 
+plt.figure()
 sns.boxplot(y='level_0', x=0, data=df_time)
 sns.despine(top=True, right=True, left=True)
 plt.xlabel('time [s]')
 plt.ylabel('')
 plt.title('Computation time difference using a random under-sampling')
 
+plt.figure()
 sns.boxplot(y='level_0', x=0, data=df_results, whis=10.0)
 sns.despine(top=True, right=True, left=True)
 ax = plt.gca()

From 85f6d5b530aa29e7ef2a5f672a8006ec52a49af9 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 11:43:21 +0200
Subject: [PATCH 49/50] iter

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ba5b8f33e..650c14bb1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -39,7 +39,7 @@ matrix:
     - env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.19.0"
     - env: DISTRIB="conda" PYTHON_VERSION="3.7"
-           NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="master"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
   allow_failures:
     - env: DISTRIB="conda" PYTHON_VERSION="3.7"
            NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"

From abeb0114680f92ff6ffefda90c9e2807c7a3fec8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 22 Aug 2018 12:58:18 +0200
Subject: [PATCH 50/50] iter

---
 build_tools/travis/install.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index 82dc10a97..3a56bac81 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -40,14 +40,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
     source activate testenv
     conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
 
-    if [[ "$SKLEARN_VERSION" == "master" ]]; then
-        conda install --yes cython
-        pip install -U git+https://github.com/scikit-learn/scikit-learn.git
-    else
-        conda install --yes scikit-learn=$SKLEARN_VERSION
-    fi
-
-    if [[ $PYTHON_VERSION == "3.6" ]] || [[ $PYTHON_VERSION == "3.7" ]]; then
+    if [[ $PYTHON_VERSION == "3.6" ]]; then
         conda install --yes pandas
         conda install --yes -c conda-forge keras
         KERAS_BACKEND=tensorflow
@@ -55,6 +48,13 @@ if [[ "$DISTRIB" == "conda" ]]; then
         sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
     fi
 
+    if [[ "$SKLEARN_VERSION" == "master" ]]; then
+        conda install --yes cython
+        pip install -U git+https://github.com/scikit-learn/scikit-learn.git
+    else
+        conda install --yes scikit-learn=$SKLEARN_VERSION
+    fi
+
     conda install --yes nose pytest pytest-cov
     # Install nose-timer via pip
     pip install nose-timer codecov