Skip to content

Commit d42e096

Browse files
committed
coll/libnbc: rewrite parts of libnbc
This commit rewrites parts of libnbc to fix issues identified by coverity and myself. The changes are as follows: - libnbc function would return invalid error codes (internal to libnbc) to the mpi layer. These codes names are of the form NBC_. They do not match up with the error codes expected by the mpi layer. I purged the use of all these error codes with the exception of NBC_OK and NBC_CONTINUE in progress. These codes are used to identify when a request handle is complete. - Handles and schedules were leaked by all collective routines on error. A new routine was added to return a collective handle (NBC_Return_handle). - Temporary buffers containting in/out neighbors for neighborhood collectives were always leaked. - Neigborhood collectives contained code to handle MPI_IN_PLACE which is never a valid input for the send or receive buffer. Stipped this code out. - Files were inconsistently named. Most are nbc_isomething.c but one was named coll_libnbc_ireduce_scatter_block.c. - Made the NBC_Schedule "structure" and object so it can be retained/released. This may enable the use of schedule caching at a later time. More testing will be needed to ensure the caching code works. If it doesn't the code should be stripped out completely. - Added code to simply common case of scheduling send/recv + barrier. - Code cleanup for readability. The code now passes the clang static analyzer. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
1 parent ae0de54 commit d42e096

30 files changed

+3901
-2876
lines changed

ompi/mca/coll/libnbc/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
sources = \
2323
coll_libnbc.h \
2424
coll_libnbc_component.c \
25-
coll_libnbc_ireduce_scatter_block.c \
2625
nbc.c \
2726
nbc_internal.h \
2827
libdict/dict.h \
@@ -49,6 +48,7 @@ sources = \
4948
nbc_ineighbor_alltoallw.c \
5049
nbc_ireduce.c \
5150
nbc_ireduce_scatter.c \
51+
nbc_ireduce_scatter_block.c \
5252
nbc_iscan.c \
5353
nbc_iscatter.c \
5454
nbc_iscatterv.c \

ompi/mca/coll/libnbc/coll_libnbc.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,16 @@ OBJ_CLASS_DECLARATION(ompi_coll_libnbc_module_t);
9898

9999
typedef ompi_coll_libnbc_module_t NBC_Comminfo;
100100

101-
/* a schedule is basically a pointer to some memory location where the
102-
* schedule array resides */
103-
typedef void* NBC_Schedule;
101+
struct NBC_Schedule {
102+
opal_object_t super;
103+
volatile int size;
104+
volatile int current_round_offset;
105+
char *data;
106+
};
107+
108+
typedef struct NBC_Schedule NBC_Schedule;
109+
110+
OBJ_CLASS_DECLARATION(NBC_Schedule);
104111

105112
struct ompi_coll_libnbc_request_t {
106113
ompi_request_t super;
@@ -110,7 +117,7 @@ struct ompi_coll_libnbc_request_t {
110117
volatile int req_count;
111118
ompi_request_t **req_array;
112119
NBC_Comminfo *comminfo;
113-
volatile NBC_Schedule *schedule;
120+
NBC_Schedule *schedule;
114121
void *tmpbuf; /* temporary buffer e.g. used for Reduce */
115122
/* TODO: we should make a handle pointer to a state later (that the user
116123
* can move request handles) */
@@ -134,9 +141,9 @@ typedef ompi_coll_libnbc_request_t NBC_Handle;
134141

135142
#define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \
136143
do { \
137-
OMPI_REQUEST_FINI(&request->super); \
144+
OMPI_REQUEST_FINI(&(req)->super); \
138145
opal_free_list_return (&mca_coll_libnbc_component.requests, \
139-
(opal_free_list_item_t*) req); \
146+
(opal_free_list_item_t*) (req)); \
140147
} while (0)
141148

142149
int ompi_coll_libnbc_progress(void);

ompi/mca/coll/libnbc/coll_libnbc_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ ompi_coll_libnbc_progress(void)
239239

240240
OPAL_LIST_FOREACH_SAFE(request, next, &mca_coll_libnbc_component.active_requests,
241241
ompi_coll_libnbc_request_t) {
242-
if (NBC_OK == NBC_Progress(request)) {
242+
if (OMPI_SUCCESS == NBC_Progress(request)) {
243243
/* done, remove and complete */
244244
opal_list_remove_item(&mca_coll_libnbc_component.active_requests,
245245
&request->super.super.super);

ompi/mca/coll/libnbc/coll_libnbc_ireduce_scatter_block.c

Lines changed: 0 additions & 243 deletions
This file was deleted.

0 commit comments

Comments
 (0)