#include <cassert>
#include <vector>
+int AsyncInfoTy::synchronize() {
+ int Result = OFFLOAD_SUCCESS;
+ if (AsyncInfo.Queue) {
+ // If we have a queue we need to synchronize it now.
+ Result = Device.synchronize(&AsyncInfo);
+ assert(AsyncInfo.Queue == nullptr &&
+ "The device plugin should have nulled the queue to indicate there "
+ "are no outstanding actions!");
+ }
+ return Result;
+}
+
/* All begin addresses for partially mapped structs must be 8-aligned in order
* to ensure proper alignment of members. E.g.
*
MapperArgsBase.data(), MapperArgs.data(),
MapperArgSizes.data(), MapperArgTypes.data(),
MapperArgNames.data(), /*arg_mappers*/ nullptr,
- /*__tgt_async_info*/ nullptr);
+ /* AsyncInfoTy */ nullptr);
return rc;
}
int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, map_var_info_t *arg_names,
- void **arg_mappers, __tgt_async_info *AsyncInfo) {
+ void **arg_mappers, AsyncInfoTy *AsyncInfo) {
// process each input.
for (int32_t i = 0; i < arg_num; ++i) {
// Ignore private variables and arrays - there is no mapping for them.
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
int rt =
- Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, AsyncInfo);
+ Device.submitData(TgtPtrBegin, HstPtrBegin, data_size, *AsyncInfo);
if (rt != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");
return OFFLOAD_FAIL;
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
- sizeof(void *), AsyncInfo);
+ sizeof(void *), *AsyncInfo);
if (rt != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");
return OFFLOAD_FAIL;
: HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete),
HasCloseModifier(HasCloseModifier) {}
};
-
-/// Synchronize device
-static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) {
- assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo");
- if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) {
- REPORT("Failed to synchronize device.\n");
- return OFFLOAD_FAIL;
- }
-
- return OFFLOAD_SUCCESS;
-}
} // namespace
/// Internal function to undo the mapping and retrieve the data from the device.
int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers, __tgt_async_info *AsyncInfo) {
+ void **ArgMappers, AsyncInfoTy *AsyncInfo) {
int Ret;
std::vector<DeallocTgtPtrInfo> DeallocTgtPtrs;
// process each input.
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize,
- AsyncInfo);
+ *AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data from device failed.\n");
return OFFLOAD_FAIL;
// nullptr, there is no data transfer happened because once there is,
// AsyncInfo->Queue will not be nullptr, so again, we don't need to
// synchronize.
- if (AsyncInfo && AsyncInfo->Queue) {
- Ret = syncDevice(Device, AsyncInfo);
+ if (AsyncInfo) {
+ Ret = AsyncInfo->synchronize();
if (Ret != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
}
int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers, __tgt_async_info *AsyncInfo) {
+ void **ArgMappers, AsyncInfoTy *AsyncInfo) {
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
/// A reference to the \p DeviceTy object
DeviceTy &Device;
- /// A pointer to a \p __tgt_async_info object
- __tgt_async_info *AsyncInfo;
+ /// A pointer to a \p AsyncInfoTy object
+ AsyncInfoTy *AsyncInfo;
// TODO: What would be the best value here? Should we make it configurable?
// If the size is larger than this threshold, we will allocate and transfer it
public:
/// Constructor
- PrivateArgumentManagerTy(DeviceTy &Dev, __tgt_async_info *AsyncInfo)
+ PrivateArgumentManagerTy(DeviceTy &Dev, AsyncInfoTy *AsyncInfo)
: Device(Dev), AsyncInfo(AsyncInfo) {}
/// Add a private argument
#endif
// If first-private, copy data from host
if (IsFirstPrivate) {
- int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
+ int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, *AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
DP("Copying data to device failed, failed.\n");
return OFFLOAD_FAIL;
FirstPrivateArgSize, DPxPTR(TgtPtr));
// Transfer data to target device
int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(),
- FirstPrivateArgSize, AsyncInfo);
+ FirstPrivateArgSize, *AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
DP("Failed to submit data of private arguments.\n");
return OFFLOAD_FAIL;
std::vector<void *> &TgtArgs,
std::vector<ptrdiff_t> &TgtOffsets,
PrivateArgumentManagerTy &PrivateArgumentManager,
- __tgt_async_info *AsyncInfo) {
+ AsyncInfoTy *AsyncInfo) {
TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", loc);
DeviceTy &Device = PM->Devices[DeviceId];
int Ret = targetDataBegin(loc, Device, ArgNum, ArgBases, Args, ArgSizes,
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin,
- sizeof(void *), AsyncInfo);
+ sizeof(void *), *AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Copying data to device failed.\n");
return OFFLOAD_FAIL;
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers,
PrivateArgumentManagerTy &PrivateArgumentManager,
- __tgt_async_info *AsyncInfo) {
+ AsyncInfoTy *AsyncInfo) {
TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", loc);
DeviceTy &Device = PM->Devices[DeviceId];
int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
- int32_t ThreadLimit, int IsTeamConstruct,
- __tgt_async_info *AsyncInfo) {
+ int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy *AsyncInfo) {
int32_t DeviceId = Device.DeviceID;
TableMap *TM = getTableMap(HostPtr);
// TODO: This will go away as soon as we consequently pass in async info
// objects (as references).
- __tgt_async_info InternalAsyncInfo;
+ AsyncInfoTy InternalAsyncInfo(Device);
if (!AsyncInfo)
AsyncInfo = &InternalAsyncInfo;
if (IsTeamConstruct)
Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
TgtArgs.size(), TeamNum, ThreadLimit,
- LoopTripCount, AsyncInfo);
+ LoopTripCount, *AsyncInfo);
else
Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
- TgtArgs.size(), AsyncInfo);
+ TgtArgs.size(), *AsyncInfo);
}
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Failed to process data after launching the kernel.\n");
return OFFLOAD_FAIL;
}
- } else if (AsyncInfo->Queue) {
+ } else {
+ // TODO: We should not synchronize here but on the outer level once we pass
+ // in a reference AsyncInfo object.
// If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
// hava any argument, and the device supports async operations, so we need a
// sync at this point.
- return syncDevice(Device, AsyncInfo);
+ return AsyncInfo->synchronize();
}
return OFFLOAD_SUCCESS;
extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, map_var_info_t *arg_names,
- void **arg_mappers, __tgt_async_info *AsyncInfo);
+ void **arg_mappers, AsyncInfoTy *AsyncInfo);
extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *arg_names,
- void **ArgMappers, __tgt_async_info *AsyncInfo);
+ void **ArgMappers, AsyncInfoTy *AsyncInfo);
extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, map_var_info_t *arg_names,
- void **arg_mappers, __tgt_async_info *AsyncInfo);
+ void **arg_mappers, AsyncInfoTy *AsyncInfo);
extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *arg_names,
void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
- int IsTeamConstruct, __tgt_async_info *AsyncInfo);
+ int IsTeamConstruct, AsyncInfoTy *AsyncInfo);
extern int CheckDeviceAndCtors(int64_t device_id);
// targetDataEnd and targetDataUpdate).
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
- map_var_info_t *, void **,
- __tgt_async_info *);
+ map_var_info_t *, void **, AsyncInfoTy *);
// Implemented in libomp, they are called from within __tgt_* functions.
#ifdef __cplusplus