Skip to content

Commit 9427287

Browse files
committed
[OpenMP][NFC] Unify target API with other by passing a __tgt_async_info pointer
Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D96430
1 parent 44f3022 commit 9427287

File tree

4 files changed

+35
-28
lines changed

4 files changed

+35
-28
lines changed

openmp/libomptarget/src/interface.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
336336

337337
DeviceTy &Device = PM->Devices[device_id];
338338
int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
339-
arg_types, arg_names, arg_mappers);
339+
arg_types, arg_names, arg_mappers, nullptr);
340340
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
341341
}
342342

@@ -408,8 +408,9 @@ EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
408408
#endif
409409

410410
DeviceTy &Device = PM->Devices[device_id];
411-
int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
412-
arg_types, arg_names, arg_mappers, 0, 0, false /*team*/);
411+
int rc =
412+
target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
413+
arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, nullptr);
413414
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
414415
return rc;
415416
}
@@ -491,7 +492,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
491492
DeviceTy &Device = PM->Devices[device_id];
492493
int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
493494
arg_types, arg_names, arg_mappers, team_num, thread_limit,
494-
true /*team*/);
495+
true /*team*/, nullptr);
495496
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
496497
return rc;
497498
}

openmp/libomptarget/src/omptarget.cpp

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "omptarget.h"
1415
#include "device.h"
1516
#include "private.h"
1617
#include "rtl.h"
@@ -159,8 +160,9 @@ static int InitLibrary(DeviceTy &Device) {
159160
DP("Has pending ctors... call now\n");
160161
for (auto &entry : lib.second.PendingCtors) {
161162
void *ctor = entry;
162-
int rc = target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr,
163-
nullptr, nullptr, nullptr, 1, 1, true /*team*/);
163+
int rc =
164+
target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr,
165+
nullptr, nullptr, nullptr, 1, 1, true /*team*/, nullptr);
164166
if (rc != OFFLOAD_SUCCESS) {
165167
REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
166168
Device.PendingGlobalsMtx.unlock();
@@ -255,7 +257,7 @@ int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
255257
int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
256258
void **args_base, void **args, int64_t *arg_sizes,
257259
int64_t *arg_types, map_var_info_t *arg_names,
258-
void **arg_mappers, __tgt_async_info *async_info_ptr) {
260+
void **arg_mappers, __tgt_async_info *AsyncInfo) {
259261
// process each input.
260262
for (int32_t i = 0; i < arg_num; ++i) {
261263
// Ignore private variables and arrays - there is no mapping for them.
@@ -401,8 +403,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
401403
if (copy && !IsHostPtr) {
402404
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
403405
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
404-
int rt = Device.submitData(TgtPtrBegin, HstPtrBegin, data_size,
405-
async_info_ptr);
406+
int rt =
407+
Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
406408
if (rt != OFFLOAD_SUCCESS) {
407409
REPORT("Copying data to device failed.\n");
408410
return OFFLOAD_FAIL;
@@ -416,7 +418,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
416418
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
417419
void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
418420
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
419-
sizeof(void *), async_info_ptr);
421+
sizeof(void *), AsyncInfo);
420422
if (rt != OFFLOAD_SUCCESS) {
421423
REPORT("Copying data to device failed.\n");
422424
return OFFLOAD_FAIL;
@@ -791,12 +793,12 @@ static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
791793
}
792794

793795
/// Internal function to pass data to/from the target.
794-
// async_info_ptr is currently unused, added here so targetDataUpdate has the
796+
// AsyncInfo is currently unused, added here so targetDataUpdate has the
795797
// same signature as targetDataBegin and targetDataEnd.
796798
int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
797799
void **ArgsBase, void **Args, int64_t *ArgSizes,
798800
int64_t *ArgTypes, map_var_info_t *ArgNames,
799-
void **ArgMappers, __tgt_async_info *AsyncInfoPtr) {
801+
void **ArgMappers, __tgt_async_info *AsyncInfo) {
800802
// process each input.
801803
for (int32_t I = 0; I < ArgNum; ++I) {
802804
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -1240,7 +1242,8 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
12401242
int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
12411243
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
12421244
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
1243-
int32_t ThreadLimit, int IsTeamConstruct) {
1245+
int32_t ThreadLimit, int IsTeamConstruct,
1246+
__tgt_async_info *AsyncInfo) {
12441247
int32_t DeviceId = Device.DeviceID;
12451248

12461249
TableMap *TM = getTableMap(HostPtr);
@@ -1261,19 +1264,23 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
12611264
}
12621265
assert(TargetTable && "Global data has not been mapped\n");
12631266

1264-
__tgt_async_info AsyncInfo;
1267+
// TODO: This will go away as soon as we consequently pass in async info
1268+
// objects (as references).
1269+
__tgt_async_info InternalAsyncInfo;
1270+
if (!AsyncInfo)
1271+
AsyncInfo = &InternalAsyncInfo;
12651272

12661273
std::vector<void *> TgtArgs;
12671274
std::vector<ptrdiff_t> TgtOffsets;
12681275

1269-
PrivateArgumentManagerTy PrivateArgumentManager(Device, &AsyncInfo);
1276+
PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
12701277

12711278
int Ret;
12721279
if (ArgNum) {
12731280
// Process data, such as data mapping, before launching the kernel
12741281
Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
12751282
ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
1276-
TgtOffsets, PrivateArgumentManager, &AsyncInfo);
1283+
TgtOffsets, PrivateArgumentManager, AsyncInfo);
12771284
if (Ret != OFFLOAD_SUCCESS) {
12781285
REPORT("Failed to process data before launching the kernel.\n");
12791286
return OFFLOAD_FAIL;
@@ -1294,10 +1301,10 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
12941301
if (IsTeamConstruct)
12951302
Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
12961303
TgtArgs.size(), TeamNum, ThreadLimit,
1297-
LoopTripCount, &AsyncInfo);
1304+
LoopTripCount, AsyncInfo);
12981305
else
12991306
Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
1300-
TgtArgs.size(), &AsyncInfo);
1307+
TgtArgs.size(), AsyncInfo);
13011308
}
13021309

13031310
if (Ret != OFFLOAD_SUCCESS) {
@@ -1310,16 +1317,16 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
13101317
// variables
13111318
Ret = processDataAfter(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
13121319
ArgSizes, ArgTypes, ArgNames, ArgMappers,
1313-
PrivateArgumentManager, &AsyncInfo);
1320+
PrivateArgumentManager, AsyncInfo);
13141321
if (Ret != OFFLOAD_SUCCESS) {
13151322
REPORT("Failed to process data after launching the kernel.\n");
13161323
return OFFLOAD_FAIL;
13171324
}
1318-
} else if (AsyncInfo.Queue) {
1325+
} else if (AsyncInfo->Queue) {
13191326
// If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
13201327
// hava any argument, and the device supports async operations, so we need a
13211328
// sync at this point.
1322-
return syncDevice(Device, &AsyncInfo);
1329+
return syncDevice(Device, AsyncInfo);
13231330
}
13241331

13251332
return OFFLOAD_SUCCESS;

openmp/libomptarget/src/private.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
2424
void **args_base, void **args, int64_t *arg_sizes,
2525
int64_t *arg_types, map_var_info_t *arg_names,
26-
void **arg_mappers,
27-
__tgt_async_info *async_info_ptr);
26+
void **arg_mappers, __tgt_async_info *AsyncInfo);
2827

2928
extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
3029
void **ArgBases, void **Args, int64_t *ArgSizes,
@@ -34,14 +33,13 @@ extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
3433
extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num,
3534
void **args_base, void **args, int64_t *arg_sizes,
3635
int64_t *arg_types, map_var_info_t *arg_names,
37-
void **arg_mappers,
38-
__tgt_async_info *async_info_ptr = nullptr);
36+
void **arg_mappers, __tgt_async_info *AsyncInfo);
3937

4038
extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
4139
void **ArgBases, void **Args, int64_t *ArgSizes,
4240
int64_t *ArgTypes, map_var_info_t *arg_names,
4341
void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
44-
int IsTeamConstruct);
42+
int IsTeamConstruct, __tgt_async_info *AsyncInfo);
4543

4644
extern int CheckDeviceAndCtors(int64_t device_id);
4745

openmp/libomptarget/src/rtl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,9 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
401401
Device.PendingGlobalsMtx.lock();
402402
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
403403
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
404-
int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr,
405-
nullptr, nullptr, nullptr, 1, 1, true /*team*/);
404+
int rc =
405+
target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr,
406+
nullptr, nullptr, nullptr, 1, 1, true /*team*/, nullptr);
406407
if (rc != OFFLOAD_SUCCESS) {
407408
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
408409
}

0 commit comments

Comments
 (0)