scikit-learn-contrib · glemaitre · Aug 23, 2018 · Aug 23, 2018
diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py
@@ -52,7 +52,7 @@ class BalancedBatchGenerator(ParentClass):
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    sparse : bool, optional (default=False)
+    keep_sparse : bool, optional (default=False)
         Either or not to conserve or not the sparsity of the input (i.e. ``X``,
         ``y``, ``sample_weight``). By default, the returned batches will be
         dense.
@@ -98,15 +98,15 @@ class BalancedBatchGenerator(ParentClass):
 
     """
     def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
-                 sparse=False, random_state=None):
+                 keep_sparse=False, random_state=None):
         if not HAS_KERAS:
             raise ImportError("'No module named 'keras'")
         self.X = X
         self.y = y
         self.sample_weight = sample_weight
         self.sampler = sampler
         self.batch_size = batch_size
-        self.sparse = sparse
+        self.keep_sparse = keep_sparse
         self.random_state = random_state
         self._sample()
 
@@ -138,7 +138,7 @@ def __getitem__(self, index):
         y_resampled = safe_indexing(
             self.y, self.indices_[index * self.batch_size:
                                   (index + 1) * self.batch_size])
-        if issparse(X_resampled) and not self.sparse:
+        if issparse(X_resampled) and not self.keep_sparse:
             X_resampled = X_resampled.toarray()
         if self.sample_weight is not None:
             sample_weight_resampled = safe_indexing(
@@ -154,7 +154,8 @@ def __getitem__(self, index):
 
 @Substitution(random_state=_random_state_docstring)
 def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
-                             batch_size=32, sparse=False, random_state=None):
+                             batch_size=32, keep_sparse=False,
+                             random_state=None):
     """Create a balanced batch generator to train keras model.
 
     Returns a generator --- as well as the number of step per epoch --- which
@@ -181,7 +182,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    sparse : bool, optional (default=False)
+    keep_sparse : bool, optional (default=False)
         Either or not to conserve or not the sparsity of the input (i.e. ``X``,
         ``y``, ``sample_weight``). By default, the returned batches will be
         dense.
@@ -226,4 +227,4 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
 
     return tf_bbg(X=X, y=y, sample_weight=sample_weight,
                   sampler=sampler, batch_size=batch_size,
-                  sparse=sparse, random_state=random_state)
+                  keep_sparse=keep_sparse, random_state=random_state)
diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py
@@ -52,15 +52,15 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
                         epochs=10)
 
 
-@pytest.mark.parametrize("is_sparse", [True, False])
-def test_balanced_batch_generator_class_sparse(is_sparse):
+@pytest.mark.parametrize("keep_sparse", [True, False])
+def test_balanced_batch_generator_class_sparse(keep_sparse):
     training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
                                                 batch_size=10,
-                                                sparse=is_sparse,
+                                                keep_sparse=keep_sparse,
                                                 random_state=42)
     for idx in range(len(training_generator)):
         X_batch, y_batch = training_generator.__getitem__(idx)
-        if is_sparse:
+        if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
             assert not sparse.issparse(X_batch)
@@ -88,14 +88,14 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
                         epochs=10)
 
 
-@pytest.mark.parametrize("is_sparse", [True, False])
-def test_balanced_batch_generator_function_sparse(is_sparse):
+@pytest.mark.parametrize("keep_sparse", [True, False])
+def test_balanced_batch_generator_function_sparse(keep_sparse):
     training_generator, steps_per_epoch = balanced_batch_generator(
-        sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
+        sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
         random_state=42)
     for idx in range(steps_per_epoch):
         X_batch, y_batch = next(training_generator)
-        if is_sparse:
+        if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
             assert not sparse.issparse(X_batch)
diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py
@@ -16,7 +16,8 @@
 
 @Substitution(random_state=_random_state_docstring)
 def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
-                             batch_size=32, sparse=False, random_state=None):
+                             batch_size=32, keep_sparse=False,
+                             random_state=None):
     """Create a balanced batch generator to train keras model.
 
     Returns a generator --- as well as the number of step per epoch --- which
@@ -43,7 +44,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
     batch_size : int, optional (default=32)
         Number of samples per gradient update.
 
-    sparse : bool, optional (default=False)
+    keep_sparse : bool, optional (default=False)
         Either or not to conserve or not the sparsity of the input ``X``. By
         default, the returned batches will be dense.
 
@@ -137,7 +138,7 @@ def generator(X, y, sample_weight, indices, batch_size):
             for index in range(0, len(indices), batch_size):
                 X_res = safe_indexing(X, indices[index:index + batch_size])
                 y_res = safe_indexing(y, indices[index:index + batch_size])
-                if issparse(X_res) and not sparse:
+                if issparse(X_res) and not keep_sparse:
                     X_res = X_res.toarray()
                 if sample_weight is None:
                     yield X_res, y_res

diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py
@@ -72,18 +72,18 @@ def accuracy(y_true, y_pred):
                   .format(e, accuracy(y, predicts_train)))
 
 
-@pytest.mark.parametrize("is_sparse", [True, False])
-def test_balanced_batch_generator_function_sparse(is_sparse):
+@pytest.mark.parametrize("keep_sparse", [True, False])
+def test_balanced_batch_generator_function_sparse(keep_sparse):
     X, y = load_iris(return_X_y=True)
     X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
     X = X.astype(np.float32)
 
     training_generator, steps_per_epoch = balanced_batch_generator(
-        sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
+        sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
         random_state=42)
     for idx in range(steps_per_epoch):
         X_batch, y_batch = next(training_generator)
-        if is_sparse:
+        if keep_sparse:
             assert sparse.issparse(X_batch)
         else:
             assert not sparse.issparse(X_batch)