scikit-learn-contrib · glemaitre · Aug 22, 2018 · Mar 1, 2018 · Mar 1, 2018 · Mar 1, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -38,11 +38,11 @@ matrix:
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.19.0"
     - env: DISTRIB="conda" PYTHON_VERSION="3.6"
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="0.19.0"
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="master"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.7"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
   allow_failures:
-    - env: DISTRIB="conda" PYTHON_VERSION="3.6"
-           NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" SKLEARN_VERSION="master"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.7"
+           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
 
 install: source build_tools/travis/install.sh
 script: bash build_tools/travis/test_script.sh

diff --git a/appveyor.yml b/appveyor.yml
@@ -10,34 +10,46 @@ environment:
     - PYTHON: "C:\\Miniconda-x64"
       PYTHON_VERSION: "2.7.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas"
 
     - PYTHON: "C:\\Miniconda"
       PYTHON_VERSION: "2.7.x"
       PYTHON_ARCH: "32"
+      OPTIONAL_DEP: "pandas"
 
     - PYTHON: "C:\\Miniconda35-x64"
       PYTHON_VERSION: "3.5.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas keras tensorflow"
 
     - PYTHON: "C:\\Miniconda36-x64"
       PYTHON_VERSION: "3.6.x"
       PYTHON_ARCH: "64"
+      OPTIONAL_DEP: "pandas keras tensorflow"
 
     - PYTHON: "C:\\Miniconda36"
       PYTHON_VERSION: "3.6.x"
       PYTHON_ARCH: "32"
+      OPTIONAL_DEP: "pandas"
 
 install:
   # Prepend miniconda installed Python to the PATH of this build
   # Add Library/bin directory to fix issue
   # https://github.com/conda/conda/issues/1753
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%"
-  - conda install pip scipy numpy scikit-learn=0.19 pandas -y -q
+  - conda install pip scipy numpy scikit-learn=0.19 -y -q
+  - "conda install %OPTIONAL_DEP% -y -q"
   - conda install pytest pytest-cov -y -q
+  - pip install codecov
   - conda install nose -y -q  # FIXME: remove this line when using sklearn > 0.19
   - pip install .
 
 test_script:
   - mkdir for_test
   - cd for_test
   - pytest --pyargs imblearn --cov-report term-missing --cov=imblearn
+
+after_test:
+  - cp .coverage %APPVEYOR_BUILD_FOLDER%
+  - cd %APPVEYOR_BUILD_FOLDER%
+  - codecov
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
@@ -92,7 +92,7 @@ conda create -n $CONDA_ENV_NAME --yes --quiet python=3
 source activate $CONDA_ENV_NAME
 
 conda install --yes pip numpy scipy scikit-learn pillow matplotlib sphinx \
-      sphinx_rtd_theme numpydoc
+      sphinx_rtd_theme numpydoc pandas keras
 pip install -U git+https://github.com/sphinx-gallery/sphinx-gallery.git
 
 # Build and install imbalanced-learn in dev mode

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
@@ -38,7 +38,15 @@ if [[ "$DISTRIB" == "conda" ]]; then
     # provided versions
     conda create -n testenv --yes python=$PYTHON_VERSION pip
     source activate testenv
-    conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION pandas
+    conda install --yes numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
+
+    if [[ $PYTHON_VERSION == "3.6" ]]; then
+        conda install --yes pandas
+        conda install --yes -c conda-forge keras
+        KERAS_BACKEND=tensorflow
+        python -c "import keras.backend"
+        sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
+    fi
 
     if [[ "$SKLEARN_VERSION" == "master" ]]; then
         conda install --yes cython
@@ -59,16 +67,17 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     # Create a new virtualenv using system site packages for python, numpy
     virtualenv --system-site-packages testvenv
     source testvenv/bin/activate
-    pip install scikit-learn pandas nose nose-timer pytest pytest-cov codecov \
-        sphinx numpydoc
+    pip install scikit-learn
+    pip install pandas keras tensorflow
+    pip install nose nose-timer pytest pytest-cov codecov sphinx numpydoc
 
 fi
 
 python --version
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
 
-python setup.py develop
+pip install -e .
 ccache --show-stats
 # Useful for debugging how ccache is used
 # cat $CCACHE_LOGFILE
diff --git a/conftest.py b/conftest.py
@@ -7,8 +7,27 @@
 
 # Set numpy array str/repr to legacy behaviour on numpy > 1.13 to make
 # the doctests pass
+import os
+import pytest
 import numpy as np
+
 try:
     np.set_printoptions(legacy='1.13')
 except TypeError:
     pass
+
+
+def pytest_runtest_setup(item):
+    fname = item.fspath.strpath
+    if (fname.endswith(os.path.join('keras', '_generator.py')) or
+            fname.endswith('miscellaneous.rst')):
+        try:
+            import keras
+        except ImportError:
+            pytest.skip('The keras package is not installed.')
+    elif (fname.endswith(os.path.join('tensorflow', '_generator.py')) or
+          fname.endswith('miscellaneous.rst')):
+        try:
+            import tensorflow
+        except ImportError:
+            pytest.skip('The tensorflow package is not installed.')
diff --git a/doc/api.rst b/doc/api.rst
@@ -111,6 +111,46 @@ Prototype selection
    ensemble.BalancedBaggingClassifier
    ensemble.EasyEnsemble
 
+.. _keras_ref:
+
+:mod:`imblearn.keras`: Batch generator for Keras
+================================================
+
+.. automodule:: imblearn.keras
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: imblearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   keras.BalancedBatchGenerator
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   keras.balanced_batch_generator
+
+.. _tensorflow_ref:
+
+:mod:`imblearn.tensorflow`: Batch generator for TensorFlow
+==========================================================
+
+.. automodule:: imblearn.tensorflow
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: imblearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   tensorflow.balanced_batch_generator
+
 .. _misc_ref:
 
 Miscellaneous

diff --git a/doc/miscellaneous.rst b/doc/miscellaneous.rst
@@ -38,3 +38,114 @@ We illustrate the use of such sampler to implement an outlier rejection
 estimator which can be easily used within a
 :class:`imblearn.pipeline.Pipeline`:
 :ref:`sphx_glr_auto_examples_plot_outlier_rejections.py`
+
+.. _generators:
+
+Custom generators
+-----------------
+
+Imbalanced-learn provides specific generators for TensorFlow and Keras which
+will generate balanced mini-batches.
+
+.. _tensorflow_generator:
+
+TensorFlow generator
+~~~~~~~~~~~~~~~~~~~~
+
+The :func:`imblearn.tensorflow.balanced_batch_generator` allow to generate
+balanced mini-batches using an imbalanced-learn sampler which returns indices::
+
+  >>> X = X.astype(np.float32)
+  >>> from imblearn.under_sampling import RandomUnderSampler
+  >>> from imblearn.tensorflow import balanced_batch_generator
+  >>> training_generator, steps_per_epoch = balanced_batch_generator(
+  ...     X, y, sample_weight=None, sampler=RandomUnderSampler(),
+  ...     batch_size=10, random_state=42)
+
+The ``generator`` and ``steps_per_epoch`` is used during the training of the
+Tensorflow model. We will illustrate how to use this generator. First, we can
+define a logistic regression model which will be optimized by a gradient
+descent::
+
+  >>> learning_rate, epochs = 0.01, 10
+  >>> input_size, output_size = X.shape[1], 3
+  >>> import tensorflow as tf
+  >>> def init_weights(shape):
+  ...     return tf.Variable(tf.random_normal(shape, stddev=0.01))
+  >>> def accuracy(y_true, y_pred):
+  ...     return np.mean(np.argmax(y_pred, axis=1) == y_true)
+  >>> # input and output
+  >>> data = tf.placeholder("float32", shape=[None, input_size])
+  >>> targets = tf.placeholder("int32", shape=[None])
+  >>> # build the model and weights
+  >>> W = init_weights([input_size, output_size])
+  >>> b = init_weights([output_size])
+  >>> out_act = tf.nn.sigmoid(tf.matmul(data, W) + b)
+  >>> # build the loss, predict, and train operator
+  >>> cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+  ...     logits=out_act, labels=targets)
+  >>> loss = tf.reduce_sum(cross_entropy)
+  >>> optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+  >>> train_op = optimizer.minimize(loss)
+  >>> predict = tf.nn.softmax(out_act)
+  >>> # Initialization of all variables in the graph
+  >>> init = tf.global_variables_initializer()
+
+Once initialized, the model is trained by iterating on balanced mini-batches of
+data and minimizing the loss previously defined::
+
+  >>> with tf.Session() as sess:
+  ...     print('Starting training')
+  ...     sess.run(init)
+  ...     for e in range(epochs):
+  ...         for i in range(steps_per_epoch):
+  ...             X_batch, y_batch = next(training_generator)
+  ...             sess.run([train_op, loss], feed_dict={data: X_batch, targets: y_batch})
+  ...         # For each epoch, run accuracy on train and test
+  ...         feed_dict = dict()
+  ...         predicts_train = sess.run(predict, feed_dict={data: X})
+  ...         print("epoch: {} train accuracy: {:.3f}"
+  ...               .format(e, accuracy(y, predicts_train)))
+  ... # doctest: +ELLIPSIS
+  Starting training
+  [...
+
+.. _keras_generator:
+
+Keras generator
+~~~~~~~~~~~~~~~
+
+Keras provides an higher level API in which a model can be defined and train by
+calling ``fit_generator`` method to train the model. To illustrate, we will
+define a logistic regression model::
+
+  >>> import keras
+  >>> y = keras.utils.to_categorical(y, 3)
+  >>> model = keras.Sequential()
+  >>> model.add(keras.layers.Dense(y.shape[1], input_dim=X.shape[1],
+  ...                              activation='softmax'))
+  >>> model.compile(optimizer='sgd', loss='categorical_crossentropy',
+  ...               metrics=['accuracy'])
+
+:func:`imblearn.keras.balanced_batch_generator` creates a balanced mini-batches
+generator with the associated number of mini-batches which will be generated::
+
+  >>> from imblearn.keras import balanced_batch_generator
+  >>> training_generator, steps_per_epoch = balanced_batch_generator(
+  ...     X, y, sampler=RandomUnderSampler(), batch_size=10, random_state=42)
+
+Then, ``fit_generator`` can be called passing the generator and the step::
+
+  >>> callback_history = model.fit_generator(generator=training_generator,
+  ...                                        steps_per_epoch=steps_per_epoch,
+  ...                                        epochs=10, verbose=0)
+
+The second possibility is to use
+:class:`imblearn.keras.BalancedBatchGenerator`. Only an instance of this class
+will be passed to ``fit_generator``::
+
+  >>> from imblearn.keras import BalancedBatchGenerator
+  >>> training_generator = BalancedBatchGenerator(
+  ...     X, y, sampler=RandomUnderSampler(), batch_size=10, random_state=42)
+  >>> callback_history = model.fit_generator(generator=training_generator,
+  ...                                        epochs=10, verbose=0)
diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -18,6 +18,12 @@ API
 - Enable to use a ``list`` for the cleaning methods to specify the class to
   sample. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+New features
+............
+
+- Add a ``keras`` and ``tensorflow`` modules to create balanced mini-batches
+  generator. :issue:`409` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Enhancement
 ...........