11
11
//
12
12
// ===----------------------------------------------------------------------===//
13
13
14
+ #include " omptarget.h"
14
15
#include " device.h"
15
16
#include " private.h"
16
17
#include " rtl.h"
@@ -159,8 +160,9 @@ static int InitLibrary(DeviceTy &Device) {
159
160
DP (" Has pending ctors... call now\n " );
160
161
for (auto &entry : lib.second .PendingCtors ) {
161
162
void *ctor = entry;
162
- int rc = target (nullptr , Device, ctor, 0 , nullptr , nullptr , nullptr ,
163
- nullptr , nullptr , nullptr , 1 , 1 , true /* team*/ );
163
+ int rc =
164
+ target (nullptr , Device, ctor, 0 , nullptr , nullptr , nullptr ,
165
+ nullptr , nullptr , nullptr , 1 , 1 , true /* team*/ , nullptr );
164
166
if (rc != OFFLOAD_SUCCESS) {
165
167
REPORT (" Running ctor " DPxMOD " failed.\n " , DPxPTR (ctor));
166
168
Device.PendingGlobalsMtx .unlock ();
@@ -255,7 +257,7 @@ int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
255
257
int targetDataBegin (ident_t *loc, DeviceTy &Device, int32_t arg_num,
256
258
void **args_base, void **args, int64_t *arg_sizes,
257
259
int64_t *arg_types, map_var_info_t *arg_names,
258
- void **arg_mappers, __tgt_async_info *async_info_ptr ) {
260
+ void **arg_mappers, __tgt_async_info *AsyncInfo ) {
259
261
// process each input.
260
262
for (int32_t i = 0 ; i < arg_num; ++i) {
261
263
// Ignore private variables and arrays - there is no mapping for them.
@@ -401,8 +403,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
401
403
if (copy && !IsHostPtr) {
402
404
DP (" Moving %" PRId64 " bytes (hst:" DPxMOD " ) -> (tgt:" DPxMOD " )\n " ,
403
405
data_size, DPxPTR (HstPtrBegin), DPxPTR (TgtPtrBegin));
404
- int rt = Device. submitData (TgtPtrBegin, HstPtrBegin, data_size,
405
- async_info_ptr );
406
+ int rt =
407
+ Device. submitData (TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo );
406
408
if (rt != OFFLOAD_SUCCESS) {
407
409
REPORT (" Copying data to device failed.\n " );
408
410
return OFFLOAD_FAIL;
@@ -416,7 +418,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
416
418
uint64_t Delta = (uint64_t )HstPtrBegin - (uint64_t )HstPtrBase;
417
419
void *TgtPtrBase = (void *)((uint64_t )TgtPtrBegin - Delta);
418
420
int rt = Device.submitData (PointerTgtPtrBegin, &TgtPtrBase,
419
- sizeof (void *), async_info_ptr );
421
+ sizeof (void *), AsyncInfo );
420
422
if (rt != OFFLOAD_SUCCESS) {
421
423
REPORT (" Copying data to device failed.\n " );
422
424
return OFFLOAD_FAIL;
@@ -791,12 +793,12 @@ static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
791
793
}
792
794
793
795
// / Internal function to pass data to/from the target.
794
- // async_info_ptr is currently unused, added here so targetDataUpdate has the
796
+ // AsyncInfo is currently unused, added here so targetDataUpdate has the
795
797
// same signature as targetDataBegin and targetDataEnd.
796
798
int targetDataUpdate (ident_t *loc, DeviceTy &Device, int32_t ArgNum,
797
799
void **ArgsBase, void **Args, int64_t *ArgSizes,
798
800
int64_t *ArgTypes, map_var_info_t *ArgNames,
799
- void **ArgMappers, __tgt_async_info *AsyncInfoPtr ) {
801
+ void **ArgMappers, __tgt_async_info *AsyncInfo ) {
800
802
// process each input.
801
803
for (int32_t I = 0 ; I < ArgNum; ++I) {
802
804
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -1240,7 +1242,8 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
1240
1242
int target (ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1241
1243
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
1242
1244
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
1243
- int32_t ThreadLimit, int IsTeamConstruct) {
1245
+ int32_t ThreadLimit, int IsTeamConstruct,
1246
+ __tgt_async_info *AsyncInfo) {
1244
1247
int32_t DeviceId = Device.DeviceID ;
1245
1248
1246
1249
TableMap *TM = getTableMap (HostPtr);
@@ -1261,19 +1264,23 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1261
1264
}
1262
1265
assert (TargetTable && " Global data has not been mapped\n " );
1263
1266
1264
- __tgt_async_info AsyncInfo;
1267
+ // TODO: This will go away as soon as we consequently pass in async info
1268
+ // objects (as references).
1269
+ __tgt_async_info InternalAsyncInfo;
1270
+ if (!AsyncInfo)
1271
+ AsyncInfo = &InternalAsyncInfo;
1265
1272
1266
1273
std::vector<void *> TgtArgs;
1267
1274
std::vector<ptrdiff_t > TgtOffsets;
1268
1275
1269
- PrivateArgumentManagerTy PrivateArgumentManager (Device, & AsyncInfo);
1276
+ PrivateArgumentManagerTy PrivateArgumentManager (Device, AsyncInfo);
1270
1277
1271
1278
int Ret;
1272
1279
if (ArgNum) {
1273
1280
// Process data, such as data mapping, before launching the kernel
1274
1281
Ret = processDataBefore (loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
1275
1282
ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
1276
- TgtOffsets, PrivateArgumentManager, & AsyncInfo);
1283
+ TgtOffsets, PrivateArgumentManager, AsyncInfo);
1277
1284
if (Ret != OFFLOAD_SUCCESS) {
1278
1285
REPORT (" Failed to process data before launching the kernel.\n " );
1279
1286
return OFFLOAD_FAIL;
@@ -1294,10 +1301,10 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1294
1301
if (IsTeamConstruct)
1295
1302
Ret = Device.runTeamRegion (TgtEntryPtr, &TgtArgs[0 ], &TgtOffsets[0 ],
1296
1303
TgtArgs.size (), TeamNum, ThreadLimit,
1297
- LoopTripCount, & AsyncInfo);
1304
+ LoopTripCount, AsyncInfo);
1298
1305
else
1299
1306
Ret = Device.runRegion (TgtEntryPtr, &TgtArgs[0 ], &TgtOffsets[0 ],
1300
- TgtArgs.size (), & AsyncInfo);
1307
+ TgtArgs.size (), AsyncInfo);
1301
1308
}
1302
1309
1303
1310
if (Ret != OFFLOAD_SUCCESS) {
@@ -1310,16 +1317,16 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1310
1317
// variables
1311
1318
Ret = processDataAfter (loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
1312
1319
ArgSizes, ArgTypes, ArgNames, ArgMappers,
1313
- PrivateArgumentManager, & AsyncInfo);
1320
+ PrivateArgumentManager, AsyncInfo);
1314
1321
if (Ret != OFFLOAD_SUCCESS) {
1315
1322
REPORT (" Failed to process data after launching the kernel.\n " );
1316
1323
return OFFLOAD_FAIL;
1317
1324
}
1318
- } else if (AsyncInfo. Queue ) {
1325
+ } else if (AsyncInfo-> Queue ) {
1319
1326
// If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
1320
1327
// hava any argument, and the device supports async operations, so we need a
1321
1328
// sync at this point.
1322
- return syncDevice (Device, & AsyncInfo);
1329
+ return syncDevice (Device, AsyncInfo);
1323
1330
}
1324
1331
1325
1332
return OFFLOAD_SUCCESS;
0 commit comments