Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit dfcf58e

Browse files
author
rhc54
committed
Merge pull request #979 from yburette/topic/merge_v1.10
mtl/ofi: merge commits from master
2 parents 6bbdd2f + b079a5e commit dfcf58e

File tree

6 files changed

+46
-19
lines changed

6 files changed

+46
-19
lines changed

ompi/mca/mtl/ofi/mtl_ofi.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
110110
*/
111111
for (i = 0; i < nprocs; ++i) {
112112
endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t);
113+
if (NULL == endpoint) {
114+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
115+
"%s:%d: mtl/ofi: could not allocate endpoint"
116+
" structure\n",
117+
__FILE__, __LINE__);
118+
ret = OMPI_ERROR;
119+
goto bail;
120+
}
121+
113122
endpoint->mtl_ofi_module = &ompi_mtl_ofi;
114123
endpoint->peer_fiaddr = fi_addrs[i];
115124

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -50,10 +50,6 @@ BEGIN_C_DECLS
5050
extern mca_mtl_ofi_module_t ompi_mtl_ofi;
5151
extern mca_base_framework_t ompi_mtl_base_framework;
5252

53-
extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
54-
size_t nprocs,
55-
struct ompi_proc_t **procs);
56-
5753
extern int ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl,
5854
size_t nprocs,
5955
struct ompi_proc_t **procs);
@@ -235,7 +231,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
235231
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
236232

237233
ompi_proc = ompi_comm_peer_lookup(comm, dest);
238-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
234+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
239235

240236
ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
241237
if (OMPI_SUCCESS != ompi_ret) return ompi_ret;
@@ -266,6 +262,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
266262
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
267263
"%s:%d: fi_trecv failed: %s(%zd)",
268264
__FILE__, __LINE__, fi_strerror(-ret), ret);
265+
free(ack_req);
269266
return ompi_mtl_ofi_get_error(ret);
270267
}
271268
} else {
@@ -284,6 +281,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
284281
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
285282
"%s:%d: fi_tinject failed: %s(%zd)",
286283
__FILE__, __LINE__, fi_strerror(-ret), ret);
284+
if (ack_req) {
285+
fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx);
286+
free(ack_req);
287+
}
287288
return ompi_mtl_ofi_get_error(ret);
288289
}
289290

@@ -460,7 +461,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
460461
if (ompi_mtl_ofi.any_addr == ofi_req->remote_addr) {
461462
src = MTL_OFI_GET_SOURCE(wc->tag);
462463
ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src);
463-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
464+
endpoint = ompi_mtl_ofi_get_endpoint(ofi_req->mtl, ompi_proc);
464465
ofi_req->remote_addr = endpoint->peer_fiaddr;
465466
}
466467
MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
@@ -532,7 +533,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
532533

533534
if (MPI_ANY_SOURCE != src) {
534535
ompi_proc = ompi_comm_peer_lookup(comm, src);
535-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
536+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
536537
remote_addr = endpoint->peer_fiaddr;
537538
} else {
538539
remote_addr = ompi_mtl_ofi.any_addr;
@@ -744,7 +745,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl,
744745
*/
745746
if (MPI_ANY_SOURCE != src) {
746747
ompi_proc = ompi_comm_peer_lookup( comm, src );
747-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
748+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
748749
remote_proc = endpoint->peer_fiaddr;
749750
}
750751

@@ -829,7 +830,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
829830
*/
830831
if (MPI_ANY_SOURCE != src) {
831832
ompi_proc = ompi_comm_peer_lookup( comm, src );
832-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
833+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
833834
remote_proc = endpoint->peer_fiaddr;
834835
}
835836

@@ -864,11 +865,13 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
864865
* The search request completed but no matching message was found.
865866
*/
866867
*matched = 0;
868+
free(ofi_req);
867869
return OMPI_SUCCESS;
868870
} else if (OPAL_UNLIKELY(0 > ret)) {
869871
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
870872
"%s:%d: fi_trecvmsg failed: %s(%zd)",
871873
__FILE__, __LINE__, fi_strerror(-ret), ret);
874+
free(ofi_req);
872875
return ompi_mtl_ofi_get_error(ret);
873876
}
874877

@@ -894,6 +897,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
894897

895898
} else {
896899
(*message) = MPI_MESSAGE_NULL;
900+
free(ofi_req);
897901
}
898902

899903
return OMPI_SUCCESS;
@@ -961,7 +965,6 @@ ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl,
961965
return OMPI_SUCCESS;
962966
}
963967

964-
965968
END_C_DECLS
966969

967970
#endif /* MTL_OFI_H_HAS_BEEN_INCLUDED */

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
3+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
44
*
55
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
66
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
@@ -241,6 +241,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
241241
hints->domain_attr->threading = FI_THREAD_UNSPEC;
242242
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
243243
hints->domain_attr->resource_mgmt = FI_RM_ENABLED;
244+
hints->domain_attr->av_type = FI_AV_MAP;
244245

245246
/**
246247
* FI_VERSION provides binary backward and forward compatibility support

ompi/mca/mtl/ofi/mtl_ofi_endpoint.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -11,10 +11,12 @@
1111
#ifndef OMPI_MTL_OFI_ENDPOINT_H
1212
#define OMPI_MTL_OFI_ENDPOINT_H
1313

14-
#include "mtl_ofi.h"
15-
1614
BEGIN_C_DECLS
1715

16+
extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
17+
size_t nprocs,
18+
struct ompi_proc_t **procs);
19+
1820
OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint_t);
1921

2022
/**
@@ -35,7 +37,15 @@ struct mca_mtl_ofi_endpoint_t {
3537
};
3638

3739
typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t;
38-
OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint);
40+
41+
static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
42+
{
43+
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
44+
ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc);
45+
}
46+
47+
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
48+
}
3949

4050
END_C_DECLS
4151
#endif

ompi/mca/mtl/ofi/mtl_ofi_request.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -55,6 +55,10 @@ struct ompi_mtl_ofi_request_t {
5555
/* lookup source of an ANY_SOURCE Recv */
5656
struct ompi_communicator_t *comm;
5757

58+
/** Reference to the MTL used to lookup */
59+
/* source of an ANY_SOURCE Recv */
60+
struct mca_mtl_base_module_t* mtl;
61+
5862
/** Pack buffer */
5963
void *buffer;
6064

ompi/mca/mtl/ofi/mtl_ofi_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ typedef struct mca_mtl_ofi_component_t {
8484
{ \
8585
match_bits = contextid; \
8686
match_bits = (match_bits << 16); \
87-
match_bits |= source; \
87+
match_bits |= (uint64_t)source; \
8888
match_bits = (match_bits << 32); \
8989
match_bits |= (MTL_OFI_TAG_MASK & tag) | type; \
9090
}
@@ -106,7 +106,7 @@ typedef struct mca_mtl_ofi_component_t {
106106
match_bits = (match_bits << 32); \
107107
mask_bits |= MTL_OFI_SOURCE_MASK; \
108108
} else { \
109-
match_bits |= source; \
109+
match_bits |= (uint64_t)source; \
110110
match_bits = (match_bits << 32); \
111111
} \
112112
\

0 commit comments

Comments
 (0)