diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py index ea99efaac..98fe19a38 100644 --- a/imblearn/over_sampling/adasyn.py +++ b/imblearn/over_sampling/adasyn.py @@ -161,6 +161,11 @@ def _sample(self, X, y): ratio_nn /= np.sum(ratio_nn) n_samples_generate = np.rint(ratio_nn * n_samples).astype(int) + # the nearest neighbors need to be fitted only on the current class + # to find the class NN to generate new samples + self.nn_.fit(X_class) + _, nn_index = self.nn_.kneighbors(X_class) + x_class_gen = [] for x_i, x_i_nn, num_sample_i in zip(X_class, nn_index, n_samples_generate): diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py index 81fdd6261..5eacb6cac 100644 --- a/imblearn/over_sampling/tests/test_adasyn.py +++ b/imblearn/over_sampling/tests/test_adasyn.py @@ -42,18 +42,30 @@ def test_ada_fit(): def test_ada_fit_sample(): ada = ADASYN(random_state=RND_SEED) X_resampled, y_resampled = ada.fit_sample(X, Y) - X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], - [1.25192108, -0.22367336], [0.53366841, -0.30312976], - [1.52091956, -0.49283504], [-0.28162401, -2.10400981], - [0.83680821, 1.72827342], [0.3084254, 0.33299982], - [0.70472253, -0.73309052], [0.28893132, -0.38761769], - [1.15514042, 0.0129463], [0.88407872, 0.35454207], - [1.31301027, -0.92648734], [-1.11515198, -0.93689695], - [-0.18410027, -0.45194484], [0.9281014, 0.53085498], - [-0.14374509, 0.27370049], [-0.41635887, -0.38299653], - [0.08711622, 0.93259929], [1.70580611, -0.11219234], - [-0.06182085, -0.28084828], [0.38614986, -0.35405599], - [0.39635544, 0.33629036], [-0.24027923, 0.04116021]]) + X_gt = np.array([[0.11622591, -0.0317206], + [0.77481731, 0.60935141], + [1.25192108, -0.22367336], + [0.53366841, -0.30312976], + [1.52091956, -0.49283504], + [-0.28162401, -2.10400981], + [0.83680821, 1.72827342], + [0.3084254, 0.33299982], + [0.70472253, -0.73309052], + [0.28893132, -0.38761769], + [1.15514042, 0.0129463], + [0.88407872, 0.35454207], + [1.31301027, -0.92648734], + [-1.11515198, -0.93689695], + [-0.18410027, -0.45194484], + [0.9281014, 0.53085498], + [-0.14374509, 0.27370049], + [-0.41635887, -0.38299653], + [0.08711622, 0.93259929], + [1.70580611, -0.11219234], + [0.36370445, -0.19262406], + [0.28204936, -0.13953426], + [0.39635544, 0.33629036], + [0.35301481, 0.25795516]]) y_gt = np.array([ 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0 ]) @@ -65,16 +77,26 @@ def test_ada_fit_sample_half(): ratio = 0.8 ada = ADASYN(ratio=ratio, random_state=RND_SEED) X_resampled, y_resampled = ada.fit_sample(X, Y) - X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], - [1.25192108, -0.22367336], [0.53366841, -0.30312976], - [1.52091956, -0.49283504], [-0.28162401, -2.10400981], - [0.83680821, 1.72827342], [0.3084254, 0.33299982], - [0.70472253, -0.73309052], [0.28893132, -0.38761769], - [1.15514042, 0.0129463], [0.88407872, 0.35454207], - [1.31301027, -0.92648734], [-1.11515198, -0.93689695], - [-0.18410027, -0.45194484], [0.9281014, 0.53085498], - [-0.14374509, 0.27370049], [-0.41635887, -0.38299653], - [0.08711622, 0.93259929], [1.70580611, -0.11219234]]) + X_gt = np.array([[0.11622591, -0.0317206], + [0.77481731, 0.60935141], + [1.25192108, -0.22367336], + [0.53366841, -0.30312976], + [1.52091956, -0.49283504], + [-0.28162401, -2.10400981], + [0.83680821, 1.72827342], + [0.3084254, 0.33299982], + [0.70472253, -0.73309052], + [0.28893132, -0.38761769], + [1.15514042, 0.0129463], + [0.88407872, 0.35454207], + [1.31301027, -0.92648734], + [-1.11515198, -0.93689695], + [-0.18410027, -0.45194484], + [0.9281014, 0.53085498], + [-0.14374509, 0.27370049], + [-0.41635887, -0.38299653], + [0.08711622, 0.93259929], + [1.70580611, -0.11219234]]) y_gt = np.array( [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0]) assert_allclose(X_resampled, X_gt, rtol=R_TOL) @@ -85,18 +107,30 @@ def test_ada_fit_sample_nn_obj(): nn = NearestNeighbors(n_neighbors=6) ada = ADASYN(random_state=RND_SEED, n_neighbors=nn) X_resampled, y_resampled = ada.fit_sample(X, Y) - X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], - [1.25192108, -0.22367336], [0.53366841, -0.30312976], - [1.52091956, -0.49283504], [-0.28162401, -2.10400981], - [0.83680821, 1.72827342], [0.3084254, 0.33299982], - [0.70472253, -0.73309052], [0.28893132, -0.38761769], - [1.15514042, 0.0129463], [0.88407872, 0.35454207], - [1.31301027, -0.92648734], [-1.11515198, -0.93689695], - [-0.18410027, -0.45194484], [0.9281014, 0.53085498], - [-0.14374509, 0.27370049], [-0.41635887, -0.38299653], - [0.08711622, 0.93259929], [1.70580611, -0.11219234], - [-0.06182085, -0.28084828], [0.38614986, -0.35405599], - [0.39635544, 0.33629036], [-0.24027923, 0.04116021]]) + X_gt = np.array([[0.11622591, -0.0317206], + [0.77481731, 0.60935141], + [1.25192108, -0.22367336], + [0.53366841, -0.30312976], + [1.52091956, -0.49283504], + [-0.28162401, -2.10400981], + [0.83680821, 1.72827342], + [0.3084254, 0.33299982], + [0.70472253, -0.73309052], + [0.28893132, -0.38761769], + [1.15514042, 0.0129463], + [0.88407872, 0.35454207], + [1.31301027, -0.92648734], + [-1.11515198, -0.93689695], + [-0.18410027, -0.45194484], + [0.9281014, 0.53085498], + [-0.14374509, 0.27370049], + [-0.41635887, -0.38299653], + [0.08711622, 0.93259929], + [1.70580611, -0.11219234], + [0.36370445, -0.19262406], + [0.28204936, -0.13953426], + [0.39635544, 0.33629036], + [0.35301481, 0.25795516]]) y_gt = np.array([ 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0 ])