@@ -284,6 +284,11 @@ class BorderlineSMOTE(BaseSMOTE):
284
284
285
285
SVMSMOTE : Over-sample using SVM-SMOTE variant.
286
286
287
+ KMeansSMOTE: Over-sample using KMeans-SMOTE variant.
288
+
289
+ SafeLevelSMOTE: Over-sample using SafeLevel-SMOTE variant.
290
+
291
+
287
292
ADASYN : Over-sample using ADASYN.
288
293
289
294
References
@@ -484,6 +489,10 @@ class SVMSMOTE(BaseSMOTE):
484
489
485
490
BorderlineSMOTE : Over-sample using Borderline-SMOTE.
486
491
492
+ KMeansSMOTE: Over-sample using KMeans-SMOTE variant.
493
+
494
+ SafeLevelSMOTE: Over-sample using SafeLevel-SMOTE variant.
495
+
487
496
ADASYN : Over-sample using ADASYN.
488
497
489
498
References
@@ -695,6 +704,10 @@ class SMOTE(BaseSMOTE):
695
704
696
705
SVMSMOTE : Over-sample using the SVM-SMOTE variant.
697
706
707
+ KMeansSMOTE: Over-sample using KMeans-SMOTE variant.
708
+
709
+ SafeLevelSMOTE: Over-sample using SafeLevel-SMOTE variant.
710
+
698
711
ADASYN : Over-sample using ADASYN.
699
712
700
713
References
@@ -864,6 +877,10 @@ class SMOTENC(SMOTE):
864
877
865
878
BorderlineSMOTE : Over-sample using Borderline-SMOTE variant.
866
879
880
+ KMeansSMOTE: Over-sample using KMeans-SMOTE variant.
881
+
882
+ SafeLevelSMOTE: Over-sample using SafeLevel-SMOTE variant.
883
+
867
884
ADASYN : Over-sample using ADASYN.
868
885
869
886
References
@@ -1318,7 +1335,7 @@ def _fit_resample(self, X, y):
1318
1335
sampling_strategy = BaseOverSampler ._sampling_strategy_docstring ,
1319
1336
random_state = _random_state_docstring ,
1320
1337
)
1321
- class SLSMOTE (BaseSMOTE ):
1338
+ class SafeLevelSMOTE (BaseSMOTE ):
1322
1339
"""Class to perform over-sampling using safe-level SMOTE.
1323
1340
This is an implementation of the Safe-level-SMOTE described in [2]_.
1324
1341
@@ -1389,13 +1406,13 @@ class SLSMOTE(BaseSMOTE):
1389
1406
>>> from collections import Counter
1390
1407
>>> from sklearn.datasets import make_classification
1391
1408
>>> from imblearn.over_sampling import \
1392
- SLSMOTE # doctest: +NORMALIZE_WHITESPACE
1409
+ SafeLevelSMOTE # doctest: +NORMALIZE_WHITESPACE
1393
1410
>>> X, y = make_classification(n_classes=2, class_sep=2,
1394
1411
... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
1395
1412
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
1396
1413
>>> print('Original dataset shape %s' % Counter(y))
1397
1414
Original dataset shape Counter({{1: 900, 0: 100}})
1398
- >>> sm = SLSMOTE (random_state=42)
1415
+ >>> sm = SafeLevelSMOTE (random_state=42)
1399
1416
>>> X_res, y_res = sm.fit_resample(X, y)
1400
1417
>>> print('Resampled dataset shape %s' % Counter(y_res))
1401
1418
Resampled dataset shape Counter({{0: 900, 1: 900}})
@@ -1415,7 +1432,7 @@ def __init__(self,
1415
1432
1416
1433
self .m_neighbors = m_neighbors
1417
1434
1418
- def _assign_sl (self , nn_estimator , samples , target_class , y ):
1435
+ def _assign_safe_levels (self , nn_estimator , samples , target_class , y ):
1419
1436
'''
1420
1437
Assign the safe levels to the instances in the target class.
1421
1438
@@ -1444,8 +1461,8 @@ def _assign_sl(self, nn_estimator, samples, target_class, y):
1444
1461
1445
1462
x = nn_estimator .kneighbors (samples , return_distance = False )[:, 1 :]
1446
1463
nn_label = (y [x ] == target_class ).astype (int )
1447
- sl = np .sum (nn_label , axis = 1 )
1448
- return sl
1464
+ safe_levels = np .sum (nn_label , axis = 1 )
1465
+ return safe_levels
1449
1466
1450
1467
def _validate_estimator (self ):
1451
1468
super ()._validate_estimator ()
@@ -1466,28 +1483,30 @@ def _fit_resample(self, X, y):
1466
1483
X_class = _safe_indexing (X , target_class_indices )
1467
1484
1468
1485
self .nn_m_ .fit (X )
1469
- sl = self ._assign_sl (self .nn_m_ , X_class , class_sample , y )
1486
+ safe_levels = self ._assign_safe_levels (
1487
+ self .nn_m_ , X_class , class_sample , y )
1470
1488
1471
1489
# filter the points in X_class that have safe level >0
1472
1490
# If safe level = 0, the point is not used to
1473
1491
# generate synthetic instances
1474
- X_safe_indices = np .flatnonzero (sl != 0 )
1492
+ X_safe_indices = np .flatnonzero (safe_levels != 0 )
1475
1493
X_safe_class = _safe_indexing (X_class , X_safe_indices )
1476
1494
1477
1495
self .nn_k_ .fit (X_class )
1478
1496
nns = self .nn_k_ .kneighbors (X_safe_class ,
1479
1497
return_distance = False )[:, 1 :]
1480
1498
1481
- sl_safe_class = sl [X_safe_indices ]
1482
- sl_nns = sl [nns ]
1499
+ sl_safe_class = safe_levels [X_safe_indices ]
1500
+ sl_nns = safe_levels [nns ]
1483
1501
sl_safe_t = np .array ([sl_safe_class ]).transpose ()
1484
1502
with np .errstate (divide = 'ignore' ):
1485
- sl_ratio = np .divide (sl_safe_t , sl_nns )
1503
+ safe_level_ratio = np .divide (sl_safe_t , sl_nns )
1486
1504
1487
- X_new , y_new = self ._make_samples_sl (X_safe_class , y .dtype ,
1488
- class_sample , X_class ,
1489
- nns , n_samples , sl_ratio ,
1490
- 1.0 )
1505
+ X_new , y_new = self ._make_samples_safelevel (X_safe_class , y .dtype ,
1506
+ class_sample , X_class ,
1507
+ nns , n_samples ,
1508
+ safe_level_ratio ,
1509
+ 1.0 )
1491
1510
1492
1511
if sparse .issparse (X_new ):
1493
1512
X_resampled = sparse .vstack ([X_resampled , X_new ])
@@ -1497,8 +1516,8 @@ def _fit_resample(self, X, y):
1497
1516
1498
1517
return X_resampled , y_resampled
1499
1518
1500
- def _make_samples_sl (self , X , y_dtype , y_type , nn_data , nn_num ,
1501
- n_samples , sl_ratio , step_size = 1. ):
1519
+ def _make_samples_safelevel (self , X , y_dtype , y_type , nn_data , nn_num ,
1520
+ n_samples , safe_level_ratio , step_size = 1. ):
1502
1521
"""A support function that returns artificial samples using
1503
1522
safe-level SMOTE. It is similar to _make_samples method for SMOTE.
1504
1523
@@ -1524,7 +1543,7 @@ def _make_samples_sl(self, X, y_dtype, y_type, nn_data, nn_num,
1524
1543
n_samples : int
1525
1544
The number of samples to generate.
1526
1545
1527
- sl_ratio : ndarray, shape (n_samples_safe, k_nearest_neighbours)
1546
+ safe_level_ratio : ndarray, shape (n_samples_safe, k_nearest_neighbours)
1528
1547
1529
1548
step_size : float, optional (default=1.)
1530
1549
The step size to create samples.
@@ -1546,8 +1565,8 @@ def _make_samples_sl(self, X, y_dtype, y_type, nn_data, nn_num,
1546
1565
size = n_samples )
1547
1566
rows = np .floor_divide (samples_indices , nn_num .shape [1 ])
1548
1567
cols = np .mod (samples_indices , nn_num .shape [1 ])
1549
- gap_arr = step_size * self ._vgenerate_gap (sl_ratio )
1550
- gaps = gap_arr .flatten ()[samples_indices ]
1568
+ gap_array = step_size * self ._vgenerate_gap (safe_level_ratio )
1569
+ gaps = gap_array .flatten ()[samples_indices ]
1551
1570
1552
1571
y_new = np .array ([y_type ] * n_samples , dtype = y_dtype )
1553
1572
@@ -1578,12 +1597,12 @@ def _make_samples_sl(self, X, y_dtype, y_type, nn_data, nn_num,
1578
1597
return X_new , y_new
1579
1598
1580
1599
def _generate_gap (self , a_ratio , rand_state = None ):
1581
- """ generate gap according to sl_ratio , non-vectorized version.
1600
+ """ generate gap according to safe_level_ratio , non-vectorized version.
1582
1601
1583
1602
Parameters
1584
1603
----------
1585
1604
a_ratio: float
1586
- sl_ratio of a single data point
1605
+ safe_level_ratio of a single data point
1587
1606
1588
1607
rand_state: random state object or int
1589
1608
@@ -1603,28 +1622,30 @@ def _generate_gap(self, a_ratio, rand_state=None):
1603
1622
elif 0 < a_ratio < 1 :
1604
1623
gap = random_state .uniform (1 - a_ratio , 1 )
1605
1624
else :
1606
- raise ValueError ('sl_ratio should be nonegative' )
1625
+ raise ValueError ('safe_level_ratio should be nonegative' )
1607
1626
return gap
1608
1627
1609
- def _vgenerate_gap (self , sl_ratio ):
1628
+ def _vgenerate_gap (self , safe_level_ratio ):
1610
1629
"""
1611
- generate gap according to sl_ratio, vectorized version of _generate_gap
1630
+ generate gap according to safe_level_ratio, vectorized version
1631
+ of _generate_gap
1612
1632
1613
1633
Parameters
1614
1634
-----------
1615
- sl_ratio : ndarray shape (n_samples_safe, k_nearest_neighbours)
1616
- sl_ratio of all instances with safe_level>0 in the specified
1617
- class
1635
+ safe_level_ratio : ndarray shape (n_samples_safe, k_nearest_neighbours)
1636
+ safe_level_ratio of all instances with safe_level>0 in the
1637
+ specified class
1618
1638
1619
1639
Returns
1620
1640
------------
1621
- gap_arr : ndarray shape (n_samples_safe, k_nearest_neighbours)
1641
+ gap_array : ndarray shape (n_samples_safe, k_nearest_neighbours)
1622
1642
the gap for all instances with safe_level>0 in the specified
1623
1643
class
1624
1644
1625
1645
"""
1626
1646
prng = check_random_state (self .random_state )
1627
- rand_state = prng .randint (sl_ratio .size + 1 , size = sl_ratio .shape )
1647
+ rand_state = prng .randint (
1648
+ safe_level_ratio .size + 1 , size = safe_level_ratio .shape )
1628
1649
vgap = np .vectorize (self ._generate_gap )
1629
- gap_arr = vgap (sl_ratio , rand_state )
1630
- return gap_arr
1650
+ gap_array = vgap (safe_level_ratio , rand_state )
1651
+ return gap_array
0 commit comments