Skip to content

FIX: rename sparse to keep_sparse for keras and tensorflow #453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions imblearn/keras/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class BalancedBatchGenerator(ParentClass):
batch_size : int, optional (default=32)
Number of samples per gradient update.

sparse : bool, optional (default=False)
keep_sparse : bool, optional (default=False)
Either or not to conserve or not the sparsity of the input (i.e. ``X``,
``y``, ``sample_weight``). By default, the returned batches will be
dense.
Expand Down Expand Up @@ -98,15 +98,15 @@ class BalancedBatchGenerator(ParentClass):

"""
def __init__(self, X, y, sample_weight=None, sampler=None, batch_size=32,
sparse=False, random_state=None):
keep_sparse=False, random_state=None):
if not HAS_KERAS:
raise ImportError("'No module named 'keras'")
self.X = X
self.y = y
self.sample_weight = sample_weight
self.sampler = sampler
self.batch_size = batch_size
self.sparse = sparse
self.keep_sparse = keep_sparse
self.random_state = random_state
self._sample()

Expand Down Expand Up @@ -138,7 +138,7 @@ def __getitem__(self, index):
y_resampled = safe_indexing(
self.y, self.indices_[index * self.batch_size:
(index + 1) * self.batch_size])
if issparse(X_resampled) and not self.sparse:
if issparse(X_resampled) and not self.keep_sparse:
X_resampled = X_resampled.toarray()
if self.sample_weight is not None:
sample_weight_resampled = safe_indexing(
Expand All @@ -154,7 +154,8 @@ def __getitem__(self, index):

@Substitution(random_state=_random_state_docstring)
def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
batch_size=32, sparse=False, random_state=None):
batch_size=32, keep_sparse=False,
random_state=None):
"""Create a balanced batch generator to train keras model.

Returns a generator --- as well as the number of step per epoch --- which
Expand All @@ -181,7 +182,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
batch_size : int, optional (default=32)
Number of samples per gradient update.

sparse : bool, optional (default=False)
keep_sparse : bool, optional (default=False)
Either or not to conserve or not the sparsity of the input (i.e. ``X``,
``y``, ``sample_weight``). By default, the returned batches will be
dense.
Expand Down Expand Up @@ -226,4 +227,4 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,

return tf_bbg(X=X, y=y, sample_weight=sample_weight,
sampler=sampler, batch_size=batch_size,
sparse=sparse, random_state=random_state)
keep_sparse=keep_sparse, random_state=random_state)
16 changes: 8 additions & 8 deletions imblearn/keras/tests/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ def test_balanced_batch_generator_class(sampler, sample_weight):
epochs=10)


@pytest.mark.parametrize("is_sparse", [True, False])
def test_balanced_batch_generator_class_sparse(is_sparse):
@pytest.mark.parametrize("keep_sparse", [True, False])
def test_balanced_batch_generator_class_sparse(keep_sparse):
training_generator = BalancedBatchGenerator(sparse.csr_matrix(X), y,
batch_size=10,
sparse=is_sparse,
keep_sparse=keep_sparse,
random_state=42)
for idx in range(len(training_generator)):
X_batch, y_batch = training_generator.__getitem__(idx)
if is_sparse:
if keep_sparse:
assert sparse.issparse(X_batch)
else:
assert not sparse.issparse(X_batch)
Expand Down Expand Up @@ -88,14 +88,14 @@ def test_balanced_batch_generator_function(sampler, sample_weight):
epochs=10)


@pytest.mark.parametrize("is_sparse", [True, False])
def test_balanced_batch_generator_function_sparse(is_sparse):
@pytest.mark.parametrize("keep_sparse", [True, False])
def test_balanced_batch_generator_function_sparse(keep_sparse):
training_generator, steps_per_epoch = balanced_batch_generator(
sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
random_state=42)
for idx in range(steps_per_epoch):
X_batch, y_batch = next(training_generator)
if is_sparse:
if keep_sparse:
assert sparse.issparse(X_batch)
else:
assert not sparse.issparse(X_batch)
7 changes: 4 additions & 3 deletions imblearn/tensorflow/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

@Substitution(random_state=_random_state_docstring)
def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
batch_size=32, sparse=False, random_state=None):
batch_size=32, keep_sparse=False,
random_state=None):
"""Create a balanced batch generator to train keras model.

Returns a generator --- as well as the number of step per epoch --- which
Expand All @@ -43,7 +44,7 @@ def balanced_batch_generator(X, y, sample_weight=None, sampler=None,
batch_size : int, optional (default=32)
Number of samples per gradient update.

sparse : bool, optional (default=False)
keep_sparse : bool, optional (default=False)
Either or not to conserve or not the sparsity of the input ``X``. By
default, the returned batches will be dense.

Expand Down Expand Up @@ -137,7 +138,7 @@ def generator(X, y, sample_weight, indices, batch_size):
for index in range(0, len(indices), batch_size):
X_res = safe_indexing(X, indices[index:index + batch_size])
y_res = safe_indexing(y, indices[index:index + batch_size])
if issparse(X_res) and not sparse:
if issparse(X_res) and not keep_sparse:
X_res = X_res.toarray()
if sample_weight is None:
yield X_res, y_res
Expand Down
8 changes: 4 additions & 4 deletions imblearn/tensorflow/tests/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,18 @@ def accuracy(y_true, y_pred):
.format(e, accuracy(y, predicts_train)))


@pytest.mark.parametrize("is_sparse", [True, False])
def test_balanced_batch_generator_function_sparse(is_sparse):
@pytest.mark.parametrize("keep_sparse", [True, False])
def test_balanced_batch_generator_function_sparse(keep_sparse):
X, y = load_iris(return_X_y=True)
X, y = make_imbalance(X, y, {0: 30, 1: 50, 2: 40})
X = X.astype(np.float32)

training_generator, steps_per_epoch = balanced_batch_generator(
sparse.csr_matrix(X), y, sparse=is_sparse, batch_size=10,
sparse.csr_matrix(X), y, keep_sparse=keep_sparse, batch_size=10,
random_state=42)
for idx in range(steps_per_epoch):
X_batch, y_batch = next(training_generator)
if is_sparse:
if keep_sparse:
assert sparse.issparse(X_batch)
else:
assert not sparse.issparse(X_batch)