Skip to content

Commit 5b765e3

Browse files
authored
[MRG] Reorganise under-sampling methods (#277)
* MAINT change organisation under-sampling * FIX conflict api doc * DOC add entry in whats new * FIX indent in the documentation * FIX add current module for linking the documentation * FIX addres christos comments * ENH Move the tests * FIX remove useless import
1 parent b874c87 commit 5b765e3

27 files changed

+81
-46
lines changed

doc/api.rst

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,34 @@ Under-sampling methods
1313
:no-members:
1414
:no-inherited-members:
1515

16-
Classes
17-
-------
16+
.. currentmodule:: imblearn
17+
18+
Prototype generation
19+
--------------------
20+
21+
.. automodule:: imblearn.under_sampling.prototype_generation
22+
:no-members:
23+
:no-inherited-members:
24+
1825
.. currentmodule:: imblearn
1926

2027
.. autosummary::
2128
:toctree: generated/
2229

2330
under_sampling.ClusterCentroids
31+
32+
Prototype selection
33+
-------------------
34+
35+
.. automodule:: imblearn.under_sampling.prototype_selection
36+
:no-members:
37+
:no-inherited-members:
38+
39+
.. currentmodule:: imblearn
40+
41+
.. autosummary::
42+
:toctree: generated/
43+
2444
under_sampling.CondensedNearestNeighbour
2545
under_sampling.EditedNearestNeighbours
2646
under_sampling.RepeatedEditedNearestNeighbours
@@ -32,7 +52,6 @@ Classes
3252
under_sampling.RandomUnderSampler
3353
under_sampling.TomekLinks
3454

35-
3655
.. _over_sampling_ref:
3756

3857
Over-sampling methods
@@ -42,8 +61,6 @@ Over-sampling methods
4261
:no-members:
4362
:no-inherited-members:
4463

45-
Classes
46-
-------
4764
.. currentmodule:: imblearn
4865

4966
.. autosummary::
@@ -63,8 +80,6 @@ Combination of over- and under-sampling methods
6380
:no-members:
6481
:no-inherited-members:
6582

66-
Classes
67-
-------
6883
.. currentmodule:: imblearn
6984

7085
.. autosummary::
@@ -83,8 +98,6 @@ Ensemble methods
8398
:no-members:
8499
:no-inherited-members:
85100

86-
Classes
87-
-------
88101
.. currentmodule:: imblearn
89102

90103
.. autosummary::
@@ -105,18 +118,10 @@ Pipeline
105118

106119
.. currentmodule:: imblearn
107120

108-
Classes
109-
-------
110121
.. autosummary::
111122
:toctree: generated/
112123

113124
pipeline.Pipeline
114-
115-
Functions
116-
---------
117-
.. autosummary::
118-
:toctree: generated/
119-
120125
pipeline.make_pipeline
121126

122127
.. _metrics_ref:
@@ -130,8 +135,6 @@ Metrics
130135

131136
.. currentmodule:: imblearn
132137

133-
Functions
134-
---------
135138
.. autosummary::
136139
:toctree: generated/
137140

@@ -152,8 +155,6 @@ Datasets
152155

153156
.. currentmodule:: imblearn
154157

155-
Functions
156-
---------
157158
.. autosummary::
158159
:toctree: generated/
159160

@@ -169,8 +170,6 @@ Utilities
169170

170171
.. currentmodule:: imblearn
171172

172-
Functions
173-
---------
174173
.. autosummary::
175174
:toctree: generated/
176175

doc/whats_new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ API changes summary
4242
errors. By `Guillaume Lemaitre`_.
4343
- creation of a module `utils.validation` to make checking of
4444
recurrent patterns. By `Guillaume Lemaitre`_.
45+
- move the under-sampling methods in `prototype_selection` and
46+
`prototype_generation` submodule to make a clearer dinstinction. By
47+
`Guillaume Lemaitre`_.
4548

4649

4750
.. _changes_0_2:

imblearn/under_sampling/__init__.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@
33
a dataset.
44
"""
55

6-
from .random_under_sampler import RandomUnderSampler
7-
from .tomek_links import TomekLinks
8-
from .cluster_centroids import ClusterCentroids
9-
from .nearmiss import NearMiss
10-
from .condensed_nearest_neighbour import CondensedNearestNeighbour
11-
from .one_sided_selection import OneSidedSelection
12-
from .neighbourhood_cleaning_rule import NeighbourhoodCleaningRule
13-
from .edited_nearest_neighbours import EditedNearestNeighbours
14-
from .edited_nearest_neighbours import RepeatedEditedNearestNeighbours
15-
from .edited_nearest_neighbours import AllKNN
16-
from .instance_hardness_threshold import InstanceHardnessThreshold
6+
from .prototype_generation import ClusterCentroids
7+
8+
from .prototype_selection import RandomUnderSampler
9+
from .prototype_selection import TomekLinks
10+
from .prototype_selection import NearMiss
11+
from .prototype_selection import CondensedNearestNeighbour
12+
from .prototype_selection import OneSidedSelection
13+
from .prototype_selection import NeighbourhoodCleaningRule
14+
from .prototype_selection import EditedNearestNeighbours
15+
from .prototype_selection import RepeatedEditedNearestNeighbours
16+
from .prototype_selection import AllKNN
17+
from .prototype_selection import InstanceHardnessThreshold
1718

1819
__all__ = [
1920
'RandomUnderSampler', 'TomekLinks', 'ClusterCentroids', 'NearMiss',
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""
2+
The :mod:`imblearn.under_sampling.prototype_generation` submodule contains
3+
methods that generate new samples in order to balance the dataset.
4+
"""
5+
6+
from .cluster_centroids import ClusterCentroids
7+
8+
__all__ = [
9+
'ClusterCentroids'
10+
]

imblearn/under_sampling/cluster_centroids.py renamed to imblearn/under_sampling/prototype_generation/cluster_centroids.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import numpy as np
1414
from sklearn.cluster import KMeans
1515

16-
from ..base import BaseMulticlassSampler
16+
from ...base import BaseMulticlassSampler
1717

1818

1919
class ClusterCentroids(BaseMulticlassSampler):
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""
2+
The :mod:`imblearn.under_sampling.prototype_selection` submodule contains
3+
methods that select samples in order to balance the dataset.
4+
"""
5+
6+
from .random_under_sampler import RandomUnderSampler
7+
from .tomek_links import TomekLinks
8+
from .nearmiss import NearMiss
9+
from .condensed_nearest_neighbour import CondensedNearestNeighbour
10+
from .one_sided_selection import OneSidedSelection
11+
from .neighbourhood_cleaning_rule import NeighbourhoodCleaningRule
12+
from .edited_nearest_neighbours import EditedNearestNeighbours
13+
from .edited_nearest_neighbours import RepeatedEditedNearestNeighbours
14+
from .edited_nearest_neighbours import AllKNN
15+
from .instance_hardness_threshold import InstanceHardnessThreshold
16+
17+
__all__ = [
18+
'RandomUnderSampler', 'TomekLinks', 'NearMiss',
19+
'CondensedNearestNeighbour', 'OneSidedSelection',
20+
'NeighbourhoodCleaningRule', 'EditedNearestNeighbours',
21+
'RepeatedEditedNearestNeighbours', 'AllKNN', 'InstanceHardnessThreshold'
22+
]

imblearn/under_sampling/condensed_nearest_neighbour.py renamed to imblearn/under_sampling/prototype_selection/condensed_nearest_neighbour.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from sklearn.neighbors import KNeighborsClassifier
1414
from sklearn.utils import check_random_state
1515

16-
from ..base import BaseMulticlassSampler
16+
from ...base import BaseMulticlassSampler
1717

1818

1919
class CondensedNearestNeighbour(BaseMulticlassSampler):

imblearn/under_sampling/edited_nearest_neighbours.py renamed to imblearn/under_sampling/prototype_selection/edited_nearest_neighbours.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import numpy as np
1515
from scipy.stats import mode
1616

17-
from ..base import BaseMulticlassSampler
18-
from ..utils import check_neighbors_object
17+
from ...base import BaseMulticlassSampler
18+
from ...utils import check_neighbors_object
1919

2020
SEL_KIND = ('all', 'mode')
2121

imblearn/under_sampling/instance_hardness_threshold.py renamed to imblearn/under_sampling/prototype_selection/instance_hardness_threshold.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from sklearn.ensemble import RandomForestClassifier
1818
from sklearn.externals.six import string_types
1919

20-
from ..base import BaseBinarySampler
20+
from ...base import BaseBinarySampler
2121

2222

2323
def _get_cv_splits(X, y, cv, random_state):

imblearn/under_sampling/nearmiss.py renamed to imblearn/under_sampling/prototype_selection/nearmiss.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
import numpy as np
1313

14-
from ..base import BaseMulticlassSampler
15-
from ..utils import check_neighbors_object
14+
from ...base import BaseMulticlassSampler
15+
from ...utils import check_neighbors_object
1616

1717

1818
class NearMiss(BaseMulticlassSampler):

imblearn/under_sampling/neighbourhood_cleaning_rule.py renamed to imblearn/under_sampling/prototype_selection/neighbourhood_cleaning_rule.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
import numpy as np
1212

13-
from ..base import BaseMulticlassSampler
14-
from ..utils import check_neighbors_object
13+
from ...base import BaseMulticlassSampler
14+
from ...utils import check_neighbors_object
1515

1616

1717
class NeighbourhoodCleaningRule(BaseMulticlassSampler):

imblearn/under_sampling/one_sided_selection.py renamed to imblearn/under_sampling/prototype_selection/one_sided_selection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
1313
from sklearn.utils import check_random_state
1414

15-
from ..base import BaseBinarySampler
15+
from ...base import BaseBinarySampler
1616
from .tomek_links import TomekLinks
1717

1818

imblearn/under_sampling/random_under_sampler.py renamed to imblearn/under_sampling/prototype_selection/random_under_sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212
from sklearn.utils import check_random_state
1313

14-
from ..base import BaseMulticlassSampler
14+
from ...base import BaseMulticlassSampler
1515

1616

1717
class RandomUnderSampler(BaseMulticlassSampler):

imblearn/under_sampling/prototype_selection/tests/__init__.py

Whitespace-only changes.

imblearn/under_sampling/tomek_links.py renamed to imblearn/under_sampling/prototype_selection/tomek_links.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import numpy as np
1313
from sklearn.neighbors import NearestNeighbors
1414

15-
from ..base import BaseBinarySampler
15+
from ...base import BaseBinarySampler
1616

1717

1818
class TomekLinks(BaseBinarySampler):

0 commit comments

Comments
 (0)