5
5
import numpy as np
6
6
import warnings
7
7
from six .moves import xrange
8
- from scipy .sparse import coo_matrix
9
8
from sklearn .utils import check_random_state
10
9
11
10
__all__ = ['Constraints' ]
@@ -20,21 +19,7 @@ class Constraints(object):
20
19
def __init__ (self , partial_labels ):
21
20
'''partial_labels : int arraylike, -1 indicating unknown label'''
22
21
partial_labels = np .asanyarray (partial_labels , dtype = int )
23
- self .num_points , = partial_labels .shape
24
- self .known_label_idx , = np .where (partial_labels >= 0 )
25
- self .known_labels = partial_labels [self .known_label_idx ]
26
-
27
- def adjacency_matrix (self , num_constraints , random_state = None ):
28
- random_state = check_random_state (random_state )
29
- a , b , c , d = self .positive_negative_pairs (num_constraints ,
30
- random_state = random_state )
31
- row = np .concatenate ((a , c ))
32
- col = np .concatenate ((b , d ))
33
- data = np .ones_like (row , dtype = int )
34
- data [len (a ):] = - 1
35
- adj = coo_matrix ((data , (row , col )), shape = (self .num_points ,) * 2 )
36
- # symmetrize
37
- return adj + adj .T
22
+ self .partial_labels = partial_labels
38
23
39
24
def positive_negative_pairs (self , num_constraints , same_length = False ,
40
25
random_state = None ):
@@ -50,17 +35,19 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
50
35
51
36
def _pairs (self , num_constraints , same_label = True , max_iter = 10 ,
52
37
random_state = np .random ):
53
- num_labels = len (self .known_labels )
38
+ known_label_idx , = np .where (self .partial_labels >= 0 )
39
+ known_labels = self .partial_labels [known_label_idx ]
40
+ num_labels = len (known_labels )
54
41
ab = set ()
55
42
it = 0
56
43
while it < max_iter and len (ab ) < num_constraints :
57
44
nc = num_constraints - len (ab )
58
45
for aidx in random_state .randint (num_labels , size = nc ):
59
46
if same_label :
60
- mask = self . known_labels [aidx ] == self . known_labels
47
+ mask = known_labels [aidx ] == known_labels
61
48
mask [aidx ] = False # avoid identity pairs
62
49
else :
63
- mask = self . known_labels [aidx ] != self . known_labels
50
+ mask = known_labels [aidx ] != known_labels
64
51
b_choices , = np .where (mask )
65
52
if len (b_choices ) > 0 :
66
53
ab .add ((aidx , random_state .choice (b_choices )))
@@ -69,16 +56,18 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10,
69
56
warnings .warn ("Only generated %d %s constraints (requested %d)" % (
70
57
len (ab ), 'positive' if same_label else 'negative' , num_constraints ))
71
58
ab = np .array (list (ab )[:num_constraints ], dtype = int )
72
- return self . known_label_idx [ab .T ]
59
+ return known_label_idx [ab .T ]
73
60
74
61
def chunks (self , num_chunks = 100 , chunk_size = 2 , random_state = None ):
75
62
"""
76
63
the random state object to be passed must be a numpy random seed
77
64
"""
78
65
random_state = check_random_state (random_state )
79
- chunks = - np .ones_like (self .known_label_idx , dtype = int )
80
- uniq , lookup = np .unique (self .known_labels , return_inverse = True )
81
- all_inds = [set (np .where (lookup == c )[0 ]) for c in xrange (len (uniq ))]
66
+ chunks = - np .ones_like (self .partial_labels , dtype = int )
67
+ uniq , lookup = np .unique (self .partial_labels , return_inverse = True )
68
+ unknown_uniq = np .where (uniq < 0 )[0 ]
69
+ all_inds = [set (np .where (lookup == c )[0 ]) for c in xrange (len (uniq ))
70
+ if c not in unknown_uniq ]
82
71
max_chunks = int (np .sum ([len (s ) // chunk_size for s in all_inds ]))
83
72
if max_chunks < num_chunks :
84
73
raise ValueError (('Not enough possible chunks of %d elements in each'
0 commit comments