@@ -425,13 +425,20 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
425
425
static int openib_btl_size_queues (struct mca_btl_openib_module_t * openib_btl )
426
426
{
427
427
uint32_t send_cqes , recv_cqes ;
428
- int rc = OPAL_SUCCESS , qp ;
428
+ int rc = OPAL_SUCCESS ;
429
429
mca_btl_openib_device_t * device = openib_btl -> device ;
430
+ uint32_t requested [BTL_OPENIB_MAX_CQ ];
431
+ bool need_resize = false;
430
432
431
433
opal_mutex_lock (& openib_btl -> ib_lock );
434
+
435
+ for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++ cq ) {
436
+ requested [cq ] = 0 ;
437
+ }
438
+
432
439
/* figure out reasonable sizes for completion queues */
433
- for ( qp = 0 ; qp < mca_btl_openib_component .num_qps ; qp ++ ) {
434
- if (BTL_OPENIB_QP_TYPE_SRQ (qp )) {
440
+ for ( int qp = 0 ; qp < mca_btl_openib_component .num_qps ; qp ++ ) {
441
+ if (BTL_OPENIB_QP_TYPE_SRQ (qp )) {
435
442
send_cqes = mca_btl_openib_component .qp_infos [qp ].u .srq_qp .sd_max ;
436
443
recv_cqes = mca_btl_openib_component .qp_infos [qp ].rd_num ;
437
444
} else {
@@ -440,24 +447,30 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
440
447
recv_cqes = send_cqes ;
441
448
}
442
449
443
- opal_mutex_lock (& openib_btl -> device -> device_lock );
444
- openib_btl -> device -> cq_size [qp_cq_prio (qp )] += recv_cqes ;
445
- openib_btl -> device -> cq_size [BTL_OPENIB_LP_CQ ] += send_cqes ;
446
- opal_mutex_unlock (& openib_btl -> device -> device_lock );
450
+ requested [qp_cq_prio (qp )] += recv_cqes ;
451
+ requested [BTL_OPENIB_LP_CQ ] += send_cqes ;
447
452
}
448
453
449
- rc = adjust_cq (device , BTL_OPENIB_HP_CQ );
450
- if (OPAL_SUCCESS != rc ) {
451
- goto out ;
452
- }
454
+ opal_mutex_lock (& openib_btl -> device -> device_lock );
455
+ for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++ cq ) {
456
+ if (requested [cq ] < mca_btl_openib_component .ib_cq_size [cq ]) {
457
+ requested [cq ] = mca_btl_openib_component .ib_cq_size [cq ];
458
+ } else if (requested [cq ] > openib_btl -> device -> ib_dev_attr .max_cqe ) {
459
+ requested [cq ] = openib_btl -> device -> ib_dev_attr .max_cqe ;
460
+ }
453
461
454
- rc = adjust_cq (device , BTL_OPENIB_LP_CQ );
455
- if (OPAL_SUCCESS != rc ) {
456
- goto out ;
457
- }
462
+ if (openib_btl -> device -> cq_size [cq ] < requested [cq ]) {
463
+ openib_btl -> device -> cq_size [cq ] = requested [cq ];
458
464
459
- out :
465
+ rc = adjust_cq (device , cq );
466
+ if (OPAL_SUCCESS != rc ) {
467
+ break ;
468
+ }
469
+ }
470
+ }
471
+ opal_mutex_unlock (& openib_btl -> device -> device_lock );
460
472
opal_mutex_unlock (& openib_btl -> ib_lock );
473
+
461
474
return rc ;
462
475
}
463
476
@@ -1107,7 +1120,7 @@ int mca_btl_openib_add_procs(
1107
1120
}
1108
1121
1109
1122
if (nprocs_new ) {
1110
- OPAL_THREAD_ADD32 (& openib_btl -> num_peers , nprocs_new );
1123
+ opal_atomic_add_32 (& openib_btl -> num_peers , nprocs_new );
1111
1124
1112
1125
/* adjust cq sizes given the new procs */
1113
1126
rc = openib_btl_size_queues (openib_btl );
@@ -1217,7 +1230,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
1217
1230
1218
1231
/* this is a new process to this openib btl
1219
1232
* account this procs if need */
1220
- OPAL_THREAD_ADD32 (& openib_btl -> num_peers , 1 );
1233
+ opal_atomic_add_32 (& openib_btl -> num_peers , 1 );
1221
1234
rc = openib_btl_size_queues (openib_btl );
1222
1235
if (OPAL_SUCCESS != rc ) {
1223
1236
BTL_ERROR (("error creating cqs" ));
0 commit comments