19
19
#include < cassert>
20
20
#include < vector>
21
21
22
+ int AsyncInfoTy::synchronize () {
23
+ int Result = OFFLOAD_SUCCESS;
24
+ if (AsyncInfo.Queue ) {
25
+ // If we have a queue we need to synchronize it now.
26
+ Result = Device.synchronize (&AsyncInfo);
27
+ assert (AsyncInfo.Queue == nullptr &&
28
+ " The device plugin should have nulled the queue to indicate there "
29
+ " are no outstanding actions!" );
30
+ }
31
+ return Result;
32
+ }
33
+
22
34
/* All begin addresses for partially mapped structs must be 8-aligned in order
23
35
* to ensure proper alignment of members. E.g.
24
36
*
@@ -248,7 +260,7 @@ int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
248
260
MapperArgsBase.data (), MapperArgs.data (),
249
261
MapperArgSizes.data (), MapperArgTypes.data (),
250
262
MapperArgNames.data (), /* arg_mappers*/ nullptr ,
251
- /* __tgt_async_info */ nullptr );
263
+ /* AsyncInfoTy */ nullptr );
252
264
253
265
return rc;
254
266
}
@@ -257,7 +269,7 @@ int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
257
269
int targetDataBegin (ident_t *loc, DeviceTy &Device, int32_t arg_num,
258
270
void **args_base, void **args, int64_t *arg_sizes,
259
271
int64_t *arg_types, map_var_info_t *arg_names,
260
- void **arg_mappers, __tgt_async_info *AsyncInfo) {
272
+ void **arg_mappers, AsyncInfoTy *AsyncInfo) {
261
273
// process each input.
262
274
for (int32_t i = 0 ; i < arg_num; ++i) {
263
275
// Ignore private variables and arrays - there is no mapping for them.
@@ -404,7 +416,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
404
416
DP (" Moving %" PRId64 " bytes (hst:" DPxMOD " ) -> (tgt:" DPxMOD " )\n " ,
405
417
data_size, DPxPTR (HstPtrBegin), DPxPTR (TgtPtrBegin));
406
418
int rt =
407
- Device.submitData (TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
419
+ Device.submitData (TgtPtrBegin, HstPtrBegin, data_size, * AsyncInfo);
408
420
if (rt != OFFLOAD_SUCCESS) {
409
421
REPORT (" Copying data to device failed.\n " );
410
422
return OFFLOAD_FAIL;
@@ -418,7 +430,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
418
430
uint64_t Delta = (uint64_t )HstPtrBegin - (uint64_t )HstPtrBase;
419
431
void *TgtPtrBase = (void *)((uint64_t )TgtPtrBegin - Delta);
420
432
int rt = Device.submitData (PointerTgtPtrBegin, &TgtPtrBase,
421
- sizeof (void *), AsyncInfo);
433
+ sizeof (void *), * AsyncInfo);
422
434
if (rt != OFFLOAD_SUCCESS) {
423
435
REPORT (" Copying data to device failed.\n " );
424
436
return OFFLOAD_FAIL;
@@ -452,24 +464,13 @@ struct DeallocTgtPtrInfo {
452
464
: HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete),
453
465
HasCloseModifier (HasCloseModifier) {}
454
466
};
455
-
456
- // / Synchronize device
457
- static int syncDevice (DeviceTy &Device, __tgt_async_info *AsyncInfo) {
458
- assert (AsyncInfo && AsyncInfo->Queue && " Invalid AsyncInfo" );
459
- if (Device.synchronize (AsyncInfo) != OFFLOAD_SUCCESS) {
460
- REPORT (" Failed to synchronize device.\n " );
461
- return OFFLOAD_FAIL;
462
- }
463
-
464
- return OFFLOAD_SUCCESS;
465
- }
466
467
} // namespace
467
468
468
469
// / Internal function to undo the mapping and retrieve the data from the device.
469
470
int targetDataEnd (ident_t *loc, DeviceTy &Device, int32_t ArgNum,
470
471
void **ArgBases, void **Args, int64_t *ArgSizes,
471
472
int64_t *ArgTypes, map_var_info_t *ArgNames,
472
- void **ArgMappers, __tgt_async_info *AsyncInfo) {
473
+ void **ArgMappers, AsyncInfoTy *AsyncInfo) {
473
474
int Ret;
474
475
std::vector<DeallocTgtPtrInfo> DeallocTgtPtrs;
475
476
// process each input.
@@ -584,7 +585,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
584
585
DP (" Moving %" PRId64 " bytes (tgt:" DPxMOD " ) -> (hst:" DPxMOD " )\n " ,
585
586
DataSize, DPxPTR (TgtPtrBegin), DPxPTR (HstPtrBegin));
586
587
Ret = Device.retrieveData (HstPtrBegin, TgtPtrBegin, DataSize,
587
- AsyncInfo);
588
+ * AsyncInfo);
588
589
if (Ret != OFFLOAD_SUCCESS) {
589
590
REPORT (" Copying data from device failed.\n " );
590
591
return OFFLOAD_FAIL;
@@ -642,8 +643,8 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
642
643
// nullptr, there is no data transfer happened because once there is,
643
644
// AsyncInfo->Queue will not be nullptr, so again, we don't need to
644
645
// synchronize.
645
- if (AsyncInfo && AsyncInfo-> Queue ) {
646
- Ret = syncDevice (Device, AsyncInfo);
646
+ if (AsyncInfo) {
647
+ Ret = AsyncInfo-> synchronize ( );
647
648
if (Ret != OFFLOAD_SUCCESS)
648
649
return OFFLOAD_FAIL;
649
650
}
@@ -798,7 +799,7 @@ static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
798
799
int targetDataUpdate (ident_t *loc, DeviceTy &Device, int32_t ArgNum,
799
800
void **ArgsBase, void **Args, int64_t *ArgSizes,
800
801
int64_t *ArgTypes, map_var_info_t *ArgNames,
801
- void **ArgMappers, __tgt_async_info *AsyncInfo) {
802
+ void **ArgMappers, AsyncInfoTy *AsyncInfo) {
802
803
// process each input.
803
804
for (int32_t I = 0 ; I < ArgNum; ++I) {
804
805
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -948,8 +949,8 @@ class PrivateArgumentManagerTy {
948
949
949
950
// / A reference to the \p DeviceTy object
950
951
DeviceTy &Device;
951
- // / A pointer to a \p __tgt_async_info object
952
- __tgt_async_info *AsyncInfo;
952
+ // / A pointer to a \p AsyncInfoTy object
953
+ AsyncInfoTy *AsyncInfo;
953
954
954
955
// TODO: What would be the best value here? Should we make it configurable?
955
956
// If the size is larger than this threshold, we will allocate and transfer it
@@ -958,7 +959,7 @@ class PrivateArgumentManagerTy {
958
959
959
960
public:
960
961
// / Constructor
961
- PrivateArgumentManagerTy (DeviceTy &Dev, __tgt_async_info *AsyncInfo)
962
+ PrivateArgumentManagerTy (DeviceTy &Dev, AsyncInfoTy *AsyncInfo)
962
963
: Device(Dev), AsyncInfo(AsyncInfo) {}
963
964
964
965
// / Add a private argument
@@ -985,7 +986,7 @@ class PrivateArgumentManagerTy {
985
986
#endif
986
987
// If first-private, copy data from host
987
988
if (IsFirstPrivate) {
988
- int Ret = Device.submitData (TgtPtr, HstPtr, ArgSize, AsyncInfo);
989
+ int Ret = Device.submitData (TgtPtr, HstPtr, ArgSize, * AsyncInfo);
989
990
if (Ret != OFFLOAD_SUCCESS) {
990
991
DP (" Copying data to device failed, failed.\n " );
991
992
return OFFLOAD_FAIL;
@@ -1041,7 +1042,7 @@ class PrivateArgumentManagerTy {
1041
1042
FirstPrivateArgSize, DPxPTR (TgtPtr));
1042
1043
// Transfer data to target device
1043
1044
int Ret = Device.submitData (TgtPtr, FirstPrivateArgBuffer.data (),
1044
- FirstPrivateArgSize, AsyncInfo);
1045
+ FirstPrivateArgSize, * AsyncInfo);
1045
1046
if (Ret != OFFLOAD_SUCCESS) {
1046
1047
DP (" Failed to submit data of private arguments.\n " );
1047
1048
return OFFLOAD_FAIL;
@@ -1089,7 +1090,7 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
1089
1090
std::vector<void *> &TgtArgs,
1090
1091
std::vector<ptrdiff_t > &TgtOffsets,
1091
1092
PrivateArgumentManagerTy &PrivateArgumentManager,
1092
- __tgt_async_info *AsyncInfo) {
1093
+ AsyncInfoTy *AsyncInfo) {
1093
1094
TIMESCOPE_WITH_NAME_AND_IDENT (" mappingBeforeTargetRegion" , loc);
1094
1095
DeviceTy &Device = PM->Devices [DeviceId];
1095
1096
int Ret = targetDataBegin (loc, Device, ArgNum, ArgBases, Args, ArgSizes,
@@ -1140,7 +1141,7 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
1140
1141
DP (" Update lambda reference (" DPxMOD " ) -> [" DPxMOD " ]\n " ,
1141
1142
DPxPTR (PointerTgtPtrBegin), DPxPTR (TgtPtrBegin));
1142
1143
Ret = Device.submitData (TgtPtrBegin, &PointerTgtPtrBegin,
1143
- sizeof (void *), AsyncInfo);
1144
+ sizeof (void *), * AsyncInfo);
1144
1145
if (Ret != OFFLOAD_SUCCESS) {
1145
1146
REPORT (" Copying data to device failed.\n " );
1146
1147
return OFFLOAD_FAIL;
@@ -1210,7 +1211,7 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
1210
1211
int64_t *ArgSizes, int64_t *ArgTypes,
1211
1212
map_var_info_t *ArgNames, void **ArgMappers,
1212
1213
PrivateArgumentManagerTy &PrivateArgumentManager,
1213
- __tgt_async_info *AsyncInfo) {
1214
+ AsyncInfoTy *AsyncInfo) {
1214
1215
TIMESCOPE_WITH_NAME_AND_IDENT (" mappingAfterTargetRegion" , loc);
1215
1216
DeviceTy &Device = PM->Devices [DeviceId];
1216
1217
@@ -1242,8 +1243,7 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
1242
1243
int target (ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1243
1244
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
1244
1245
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
1245
- int32_t ThreadLimit, int IsTeamConstruct,
1246
- __tgt_async_info *AsyncInfo) {
1246
+ int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy *AsyncInfo) {
1247
1247
int32_t DeviceId = Device.DeviceID ;
1248
1248
1249
1249
TableMap *TM = getTableMap (HostPtr);
@@ -1266,7 +1266,7 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1266
1266
1267
1267
// TODO: This will go away as soon as we consequently pass in async info
1268
1268
// objects (as references).
1269
- __tgt_async_info InternalAsyncInfo;
1269
+ AsyncInfoTy InternalAsyncInfo (Device) ;
1270
1270
if (!AsyncInfo)
1271
1271
AsyncInfo = &InternalAsyncInfo;
1272
1272
@@ -1301,10 +1301,10 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1301
1301
if (IsTeamConstruct)
1302
1302
Ret = Device.runTeamRegion (TgtEntryPtr, &TgtArgs[0 ], &TgtOffsets[0 ],
1303
1303
TgtArgs.size (), TeamNum, ThreadLimit,
1304
- LoopTripCount, AsyncInfo);
1304
+ LoopTripCount, * AsyncInfo);
1305
1305
else
1306
1306
Ret = Device.runRegion (TgtEntryPtr, &TgtArgs[0 ], &TgtOffsets[0 ],
1307
- TgtArgs.size (), AsyncInfo);
1307
+ TgtArgs.size (), * AsyncInfo);
1308
1308
}
1309
1309
1310
1310
if (Ret != OFFLOAD_SUCCESS) {
@@ -1322,11 +1322,13 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1322
1322
REPORT (" Failed to process data after launching the kernel.\n " );
1323
1323
return OFFLOAD_FAIL;
1324
1324
}
1325
- } else if (AsyncInfo->Queue ) {
1325
+ } else {
1326
+ // TODO: We should not synchronize here but on the outer level once we pass
1327
+ // in a reference AsyncInfo object.
1326
1328
// If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
1327
1329
// hava any argument, and the device supports async operations, so we need a
1328
1330
// sync at this point.
1329
- return syncDevice (Device, AsyncInfo);
1331
+ return AsyncInfo-> synchronize ( );
1330
1332
}
1331
1333
1332
1334
return OFFLOAD_SUCCESS;
0 commit comments