You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Teach the dynamic rules file reader to look for the
alltoall_algorithm_max_requests tuning parameter. To keep the dynamic rules
file format backward compatible the alltoall_algorithm_max_requests is
optional. When not present in the rule definition the value of the
corresponding MCA variable is used instead.
Resolves#12589
Signed-off-by: Burlen Loring <bloring@nvidia.com>
longNCOL=0, /* number of collectives for which rules are provided */
73
+
COLID=0, /* identifies the collective type to associate the rules with */
74
+
NCOMSIZES=0, /* number of sets of message size rules. the key is communicator size */
75
+
COMSIZE=0, /* communicator size, the key identifying a specific set of message size rules. */
76
+
NMSGSIZES=0, /* number of message size rules in the set. */
77
+
MSGSIZE=0, /* message size, the key identifying a specific rule in the set. */
78
+
ALG=0, /* the collective specific algorithm to use */
79
+
FANINOUT=0, /* algorithm specific tuning parameter */
80
+
SEGSIZE=0, /* algorithm specific tuning parameter */
81
+
MAXREQ=0; /* algorithm specific tuning parameter */
61
82
FILE*fptr= (FILE*) NULL;
62
83
intx, ncs, nms;
63
84
@@ -103,106 +124,126 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
103
124
goto on_file_error;
104
125
}
105
126
106
-
if( (getnext(fptr, &X) <0) || (X<0) ) {
127
+
/* get the number of collectives for which rules are provided in the file */
128
+
if( (getnext(fptr, &NCOL) <0) || (NCOL<0) ) {
107
129
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
108
130
goto on_file_error;
109
131
}
110
-
if (X>n_collectives) {
111
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
132
+
if (NCOL>n_collectives) {
133
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline));
112
134
goto on_file_error;
113
135
}
114
136
115
-
for (x=0;x<X;x++) { /* for each collective */
137
+
for (x=0;x<NCOL;x++) { /* for each collective */
116
138
117
-
if( (getnext(fptr, &CI) <0) || (CI<0) ) {
139
+
/* get the collective for which rules are being provided */
140
+
if( (getnext(fptr, &COLID) <0) || (COLID<0) ) {
118
141
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
119
142
goto on_file_error;
120
143
}
121
-
if (CI>=n_collectives) {
122
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
144
+
if (COLID>=n_collectives) {
145
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline));
123
146
goto on_file_error;
124
147
}
125
148
126
-
if (alg_rules[CI].alg_rule_id!=CI) {
127
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI));
149
+
if (alg_rules[COLID].alg_rule_id!=COLID) {
150
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID));
128
151
goto on_file_error;
129
152
}
130
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI));
131
-
alg_p=&alg_rules[CI];
153
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID));
154
+
alg_p=&alg_rules[COLID];
132
155
133
-
alg_p->alg_rule_id=CI;
156
+
alg_p->alg_rule_id=COLID;
134
157
alg_p->n_com_sizes=0;
135
158
alg_p->com_rules= (ompi_coll_com_rule_t*) NULL;
136
159
137
-
if( (getnext (fptr, &NCS) <0) || (NCS<0) ) {
138
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline));
160
+
/* get the number of communicator sizes for which a set of rules are to be provided */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
155
180
goto on_file_error;
156
181
}
157
182
158
-
com_p->mpi_comsize=CS;
183
+
com_p->mpi_comsize=COMSIZE;
159
184
160
-
if( (getnext (fptr, &NMS) <0) || (NMS<0) ) {
161
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
185
+
/* get the number of message sizes to specify rules for. inner set size */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
162
188
goto on_file_error;
163
189
}
164
190
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n",
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname));
170
196
goto on_file_error;
171
197
}
172
198
173
199
msg_p=com_p->msg_rules;
174
200
175
-
for (nms=0;nms<NMS;nms++) { /* for each msg size */
201
+
for (nms=0;nms<NMSGSIZES;nms++) { /* for each msg size */
176
202
177
203
msg_p=&(com_p->msg_rules[nms]);
178
204
179
-
if( (getnext (fptr, &MS) <0) || (MS<0) ) {
180
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
205
+
/* read the message size to associate the rule with */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
181
208
goto on_file_error;
182
209
}
183
-
msg_p->msg_size= (size_t)MS;
210
+
msg_p->msg_size= (size_t)MSGSIZE;
184
211
212
+
/* read the collective specific algorithm identifier */
185
213
if( (getnext (fptr, &ALG) <0) || (ALG<0) ) {
186
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
214
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
221
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
193
222
goto on_file_error;
194
223
}
195
224
msg_p->result_topo_faninout=FANINOUT;
196
225
197
-
if( (getnext (fptr, &SS) <0) || (SS<0) ) {
198
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
199
229
goto on_file_error;
200
230
}
201
-
msg_p->result_segsize=SS;
231
+
msg_p->result_segsize=SEGSIZE;
232
+
233
+
/* read the max requests tuning parameter. optional */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
238
+
goto on_file_error;
239
+
}
240
+
msg_p->result_max_requests=MAXREQ;
241
+
}
202
242
203
-
if (!nms&&MS) {
243
+
/* check the first rule is for 0 size. look-up depends on this */
244
+
if (!nms&&MSGSIZE) {
204
245
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
205
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
246
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline));
206
247
goto on_file_error;
207
248
}
208
249
@@ -219,7 +260,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
219
260
} /* comm size */
220
261
221
262
total_alg_count++;
222
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI));
263
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID));
0 commit comments