diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst index 7aac7eca4..8552b2ba7 100644 --- a/doc/whats_new/v0.0.4.rst +++ b/doc/whats_new/v0.0.4.rst @@ -78,6 +78,15 @@ Maintenance - Remove deprecated parameters in 0.2 - :issue:`331` by :user:`Guillaume Lemaitre `. +- Make some modules private. + :issue:`452` by :user:`Guillaume Lemaitre `. + +Documentation +............. + +- Remove some docstring which are not necessary. + :issue:`454` by :user:`Guillaume Lemaitre `. + Deprecation ........... diff --git a/imblearn/base.py b/imblearn/base.py index eb2800b01..0cb7dfecf 100644 --- a/imblearn/base.py +++ b/imblearn/base.py @@ -95,7 +95,8 @@ def fit_sample(self, X, y): @abstractmethod def _sample(self, X, y): - """Resample the dataset. + """Base method defined in each sampler to defined the sampling + strategy. Parameters ---------- diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py index d1a1960f4..4618264c5 100644 --- a/imblearn/combine/_smote_enn.py +++ b/imblearn/combine/_smote_enn.py @@ -149,26 +149,6 @@ def fit(self, X, y): return self def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new) - The corresponding label of `X_resampled` - - """ self._validate_estimator() X_res, y_res = self.smote_.fit_sample(X, y) diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py index f94788d1f..ce6143ab4 100644 --- a/imblearn/combine/_smote_tomek.py +++ b/imblearn/combine/_smote_tomek.py @@ -157,26 +157,6 @@ def fit(self, X, y): return self def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - """ self._validate_estimator() X_res, y_res = self.smote_.fit_sample(X, y) diff --git a/imblearn/ensemble/_balance_cascade.py b/imblearn/ensemble/_balance_cascade.py index 8eb2c6a38..7532446bf 100644 --- a/imblearn/ensemble/_balance_cascade.py +++ b/imblearn/ensemble/_balance_cascade.py @@ -152,30 +152,6 @@ def _validate_estimator(self): self.logger.debug(self.estimator_) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_subset, n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_subset, n_samples_new) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_subset, n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() random_state = check_random_state(self.random_state) diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index e42e0aeba..4ed517b17 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -102,31 +102,6 @@ def __init__(self, self.n_subsets = n_subsets def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_subset, n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_subset, n_samples_new) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_subset, n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ - random_state = check_random_state(self.random_state) X_resampled = [] diff --git a/imblearn/over_sampling/_adasyn.py b/imblearn/over_sampling/_adasyn.py index dab9ed40f..9ee6bdd04 100644 --- a/imblearn/over_sampling/_adasyn.py +++ b/imblearn/over_sampling/_adasyn.py @@ -107,27 +107,6 @@ def _validate_estimator(self): self.nn_.set_params(**{'n_jobs': self.n_jobs}) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - - """ self._validate_estimator() random_state = check_random_state(self.random_state) diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py index 73cca1c66..8d070f4c6 100644 --- a/imblearn/over_sampling/_random_over_sampler.py +++ b/imblearn/over_sampling/_random_over_sampler.py @@ -89,26 +89,6 @@ def _check_X_y(X, y): return X, y, binarize_y def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - """ random_state = check_random_state(self.random_state) target_stats = Counter(y) diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py index ed84cab79..94c49bbdc 100644 --- a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py +++ b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py @@ -136,26 +136,6 @@ def _generate_sample(self, X, y, centroids, target_class): return X_new, y_new def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - """ self._validate_estimator() if self.voting == 'auto': diff --git a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py index 8d275e72e..3624302cf 100644 --- a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py +++ b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py @@ -129,30 +129,6 @@ def _validate_estimator(self): ' Got {} instead.'.format(type(self.n_neighbors))) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() random_state = check_random_state(self.random_state) diff --git a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py index 9eac328cc..1949a20cf 100644 --- a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py @@ -139,30 +139,6 @@ def _validate_estimator(self): raise NotImplementedError def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() idx_under = np.empty((0, ), dtype=int) @@ -328,31 +304,6 @@ def _validate_estimator(self): ratio=self.ratio) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ - self._validate_estimator() X_, y_ = X, y @@ -539,30 +490,6 @@ def _validate_estimator(self): ratio=self.ratio) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() X_, y_ = X, y diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py index 6aa9f4b3b..1ecb8ec64 100644 --- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py @@ -126,26 +126,6 @@ def _validate_estimator(self): type(self.estimator))) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() target_stats = Counter(y) diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py index 51fcf9f3d..4467784e3 100644 --- a/imblearn/under_sampling/_prototype_selection/_nearmiss.py +++ b/imblearn/under_sampling/_prototype_selection/_nearmiss.py @@ -212,30 +212,6 @@ def _validate_estimator(self): ' {}'.format(self.version)) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() idx_under = np.empty((0, ), dtype=int) diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py index 4bcb46b11..22191d0e9 100644 --- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py @@ -140,30 +140,6 @@ def _validate_estimator(self): " Got {} instead.".format(self.threshold_cleaning)) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() enn = EditedNearestNeighbours( sampling_strategy=self.sampling_strategy, diff --git a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py index 046fa2a8e..aa2ba1464 100644 --- a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py +++ b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py @@ -123,29 +123,6 @@ def _validate_estimator(self): ' Got {} instead.'.format(type(self.n_neighbors))) def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : ndarray, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : ndarray, shape (n_samples, ) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : ndarray, shape (n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ self._validate_estimator() random_state = check_random_state(self.random_state) diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py index 3b3c7691d..80cd0aad0 100644 --- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py +++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py @@ -93,31 +93,6 @@ def _check_X_y(X, y): return X, y, binarize_y def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, an array will be returned - containing a boolean for each sample to represent whether - that sample was selected or not. - - """ random_state = check_random_state(self.random_state) idx_under = np.empty((0, ), dtype=int) diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py index 666bbcd16..52d1b1cf6 100644 --- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py +++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py @@ -135,30 +135,6 @@ def is_tomek(y, nn_index, class_type): return links def _sample(self, X, y): - """Resample the dataset. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - Matrix containing the data which have to be sampled. - - y : array-like, shape (n_samples,) - Corresponding label for each sample in X. - - Returns - ------- - X_resampled : {ndarray, sparse matrix}, shape \ -(n_samples_new, n_features) - The array containing the resampled data. - - y_resampled : ndarray, shape (n_samples_new,) - The corresponding label of `X_resampled` - - idx_under : ndarray, shape (n_samples, ) - If `return_indices` is `True`, a boolean array will be returned - containing the which samples have been selected. - - """ # check for deprecated random_state if self.random_state is not None: deprecate_parameter(self, '0.4', 'random_state')