From 01dc5605d9edca89fc8a3224fe1068dd0a17da3e Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 23 Aug 2018 16:27:53 +0200 Subject: [PATCH] FIX: rename sparse to keep_sparse for keras and tensorflow --- imblearn/keras/_generator.py | 15 ++++++++------- imblearn/keras/tests/test_generator.py | 16 ++++++++-------- imblearn/tensorflow/_generator.py | 7 ++++--- imblearn/tensorflow/tests/test_generator.py | 8 ++++---- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/imblearn/keras/_generator.py b/imblearn/keras/_generator.py index c3f5a4af1..2ccff0759 100644 --- a/imblearn/keras/_generator.py +++ b/imblearn/keras/_generator.py @@ -52,7 +52,7 @@ class BalancedBatchGenerator(ParentClass): batch_size : int, optional (default=32) Number of samples per gradient update. - sparse : bool, optional (default=False) + keep_sparse : bool, optional (default=False) Either or not to conserve or not the sparsity of the input (i.e. ``X``, ``y``, ``sample_weight``). By default, the returned batches will be dense. @@ -98,7 +98,7 @@ class BalancedBatchGenerator(ParentClass): """ def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32, - sparse=False, random_state=None): + keep_sparse=False, random_state=None): if not HAS_KERAS: raise ImportError("'No module named 'keras'") self.X = X @@ -106,7 +106,7 @@ def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32, self.sample_weight = sample_weight self.sampler = sampler self.batch_size = batch_size - self.sparse = sparse + self.keep_sparse = keep_sparse self.random_state = random_state self._sample() @@ -138,7 +138,7 @@ def __getitem__(self, index): y_resampled = safe_indexing( self.y, self.indices_[index * self.batch_size: (index + 1) * self.batch_size]) - if issparse(X_resampled) and not self.sparse: + if issparse(X_resampled) and not self.keep_sparse: X_resampled = X_resampled.toarray() if self.sample_weight is not None: sample_weight_resampled = safe_indexing( @@ -154,7 +154,8 @@ def __getitem__(self, index): @Substitution(random_state=_random_state_docstring) def balanced_batch_generator(X, y, sample_weight=None, sampler=None, - batch_size=32, sparse=False, random_state=None): + batch_size=32, keep_sparse=False, + random_state=None): """Create a balanced batch generator to train keras model. Returns a generator --- as well as the number of step per epoch --- which @@ -181,7 +182,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None, batch_size : int, optional (default=32) Number of samples per gradient update. - sparse : bool, optional (default=False) + keep_sparse : bool, optional (default=False) Either or not to conserve or not the sparsity of the input (i.e. ``X``, ``y``, ``sample_weight``). By default, the returned batches will be dense. @@ -226,4 +227,4 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None, return tf_bbg(X=X, y=y, sample_weight=sample_weight, sampler=sampler, batch_size=batch_size, - sparse=sparse, random_state=random_state) + keep_sparse=keep_sparse, random_state=random_state) diff --git a/imblearn/keras/tests/test_generator.py b/imblearn/keras/tests/test_generator.py index 7b0491146..cbab74864 100644 --- a/imblearn/keras/tests/test_generator.py +++ b/imblearn/keras/tests/test_generator.py @@ -52,15 +52,15 @@ def test_balanced_batch_generator_class(sampler, sample_weight): epochs=10) -@pytest.mark.parametrize("is_sparse", [True, False]) -def test_balanced_batch_generator_class_sparse(is_sparse): +@pytest.mark.parametrize("keep_sparse", [True, False]) +def test_balanced_batch_generator_class_sparse(keep_sparse): training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y, batch_size=10, - sparse=is_sparse, + keep_sparse=keep_sparse, random_state=42) for idx in range(len(training_generator)): X_batch, y_batch = training_generator.__getitem__(idx) - if is_sparse: + if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch) @@ -88,14 +88,14 @@ def test_balanced_batch_generator_function(sampler, sample_weight): epochs=10) -@pytest.mark.parametrize("is_sparse", [True, False]) -def test_balanced_batch_generator_function_sparse(is_sparse): +@pytest.mark.parametrize("keep_sparse", [True, False]) +def test_balanced_batch_generator_function_sparse(keep_sparse): training_generator, steps_per_epoch = balanced_batch_generator( - sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10, + sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, random_state=42) for idx in range(steps_per_epoch): X_batch, y_batch = next(training_generator) - if is_sparse: + if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch) diff --git a/imblearn/tensorflow/_generator.py b/imblearn/tensorflow/_generator.py index 9b0cb06d5..1a21c106c 100644 --- a/imblearn/tensorflow/_generator.py +++ b/imblearn/tensorflow/_generator.py @@ -16,7 +16,8 @@ @Substitution(random_state=_random_state_docstring) def balanced_batch_generator(X, y, sample_weight=None, sampler=None, - batch_size=32, sparse=False, random_state=None): + batch_size=32, keep_sparse=False, + random_state=None): """Create a balanced batch generator to train keras model. Returns a generator --- as well as the number of step per epoch --- which @@ -43,7 +44,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None, batch_size : int, optional (default=32) Number of samples per gradient update. - sparse : bool, optional (default=False) + keep_sparse : bool, optional (default=False) Either or not to conserve or not the sparsity of the input ``X``. By default, the returned batches will be dense. @@ -137,7 +138,7 @@ def generator(X, y, sample_weight, indices, batch_size): for index in range(0, len(indices), batch_size): X_res = safe_indexing(X, indices[index:index + batch_size]) y_res = safe_indexing(y, indices[index:index + batch_size]) - if issparse(X_res) and not sparse: + if issparse(X_res) and not keep_sparse: X_res = X_res.toarray() if sample_weight is None: yield X_res, y_res diff --git a/imblearn/tensorflow/tests/test_generator.py b/imblearn/tensorflow/tests/test_generator.py index 48bce2af6..78eda3b1d 100644 --- a/imblearn/tensorflow/tests/test_generator.py +++ b/imblearn/tensorflow/tests/test_generator.py @@ -72,18 +72,18 @@ def accuracy(y_true, y_pred): .format(e, accuracy(y, predicts_train))) -@pytest.mark.parametrize("is_sparse", [True, False]) -def test_balanced_batch_generator_function_sparse(is_sparse): +@pytest.mark.parametrize("keep_sparse", [True, False]) +def test_balanced_batch_generator_function_sparse(keep_sparse): X, y = load_iris(return_X_y=True) X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40}) X = X.astype(np.float32) training_generator, steps_per_epoch = balanced_batch_generator( - sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10, + sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10, random_state=42) for idx in range(steps_per_epoch): X_batch, y_batch = next(training_generator) - if is_sparse: + if keep_sparse: assert sparse.issparse(X_batch) else: assert not sparse.issparse(X_batch)