// Print the device information
void __tgt_rtl_print_device_info(int32_t ID);
+// Event related interfaces. It is expected to use the interfaces in the
+// following way:
+// 1) Create an event on the target device (__tgt_rtl_create_event).
+// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment
+// of function call to __tgt_rtl_record_event. An event becomes "meaningful"
+// once it is recorded, such that others can depend on it.
+// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the
+// operation is blocking or non-blocking depends on the target. It is expected
+// to be non-blocking, just set dependence and return.
+// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the
+// thread calling the function.
+// 5) Destroy the event (__tgt_rtl_destroy_event).
+// {
+int32_t __tgt_rtl_create_event(int32_t ID, void **Event);
+
+int32_t __tgt_rtl_record_event(int32_t ID, void *Event,
+ __tgt_async_info *AsyncInfo);
+
+int32_t __tgt_rtl_wait_event(int32_t ID, void *Event,
+ __tgt_async_info *AsyncInfo);
+
+int32_t __tgt_rtl_sync_event(int32_t ID, void *Event);
+
+int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event);
+// }
+
#ifdef __cplusplus
}
#endif
DLWRAP(cuCtxGetLimit, 2);
DLWRAP(cuCtxSetLimit, 2);
+DLWRAP(cuEventCreate, 2);
+DLWRAP(cuEventRecord, 2);
+DLWRAP(cuStreamWaitEvent, 3);
+DLWRAP(cuEventSynchronize, 1);
+DLWRAP(cuEventDestroy, 1);
+
DLWRAP_FINALIZE();
#ifndef DYNAMIC_CUDA_PATH
typedef struct CUctx_st *CUcontext;
typedef struct CUfunc_st *CUfunction;
typedef struct CUstream_st *CUstream;
+typedef struct CUevent_st *CUevent;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
CUresult cuCtxGetLimit(size_t *, CUlimit);
CUresult cuCtxSetLimit(CUlimit, size_t);
+CUresult cuEventCreate(CUevent *, unsigned int);
+CUresult cuEventRecord(CUevent, CUstream);
+CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
+CUresult cuEventSynchronize(CUevent);
+CUresult cuEventDestroy(CUevent);
+
#endif
return OFFLOAD_SUCCESS;
}
+int createEvent(void **P) {
+ CUevent Event = nullptr;
+
+ CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ *P = Event;
+
+ return OFFLOAD_SUCCESS;
+}
+
+int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
+ CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventRecord(Event, Stream);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when recording event. stream = " DPxMOD ", event = " DPxMOD "\n",
+ DPxPTR(Stream), DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
+int syncEvent(void *EventPtr) {
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventSynchronize(Event);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when syncing event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
+int destroyEvent(void *EventPtr) {
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ CUresult Err = cuEventDestroy(Event);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
// Structure contains per-device data
struct DeviceDataTy {
/// List that contains all the kernels.
"Error returned from cuDeviceGetAttribute\n");
printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
}
+
+ int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo,
+ void *EventPtr) const {
+ CUstream Stream = getStream(DeviceId, AsyncInfo);
+ CUevent Event = reinterpret_cast<CUevent>(EventPtr);
+
+ // We don't use CU_EVENT_WAIT_DEFAULT here as it is only available from
+ // specific CUDA version, and defined as 0x0. In previous version, per CUDA
+ // API document, that argument has to be 0x0.
+ CUresult Err = cuStreamWaitEvent(Stream, Event, 0);
+ if (Err != CUDA_SUCCESS) {
+ DP("Error when waiting event. stream = " DPxMOD ", event = " DPxMOD "\n",
+ DPxPTR(Stream), DPxPTR(Event));
+ CUDA_ERR_STRING(Err);
+ return OFFLOAD_FAIL;
+ }
+
+ return OFFLOAD_SUCCESS;
+ }
};
DeviceRTLTy DeviceRTL;
DeviceRTL.printDeviceInfo(device_id);
}
+int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
+ assert(event && "event is nullptr");
+ return createEvent(event);
+}
+
+int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
+ __tgt_async_info *async_info_ptr) {
+ assert(async_info_ptr && "async_info_ptr is nullptr");
+ assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
+ assert(event_ptr && "event_ptr is nullptr");
+
+ return recordEvent(event_ptr, async_info_ptr);
+}
+
+int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr,
+ __tgt_async_info *async_info_ptr) {
+ assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
+ assert(async_info_ptr && "async_info_ptr is nullptr");
+ assert(event_ptr && "event is nullptr");
+
+ return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr);
+}
+
+int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) {
+ assert(event_ptr && "event is nullptr");
+
+ return syncEvent(event_ptr);
+}
+
+int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
+ assert(event_ptr && "event is nullptr");
+
+ return destroyEvent(event_ptr);
+}
+
#ifdef __cplusplus
}
#endif
__tgt_rtl_supports_empty_images;
__tgt_rtl_set_info_flag;
__tgt_rtl_print_device_info;
+ __tgt_rtl_create_event;
+ __tgt_rtl_record_event;
+ __tgt_rtl_wait_event;
+ __tgt_rtl_sync_event;
+ __tgt_rtl_destroy_event;
local:
*;
};
return OFFLOAD_SUCCESS;
}
+int32_t DeviceTy::createEvent(void **Event) {
+ if (RTL->create_event)
+ return RTL->create_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
+ if (RTL->record_event)
+ return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
+ if (RTL->wait_event)
+ return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::syncEvent(void *Event) {
+ if (RTL->sync_event)
+ return RTL->sync_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
+int32_t DeviceTy::destroyEvent(void *Event) {
+ if (RTL->create_event)
+ return RTL->destroy_event(RTLDeviceID, Event);
+
+ return OFFLOAD_SUCCESS;
+}
+
/// Check whether a device has an associated RTL and initialize it if it's not
/// already initialized.
bool device_is_ready(int device_num) {
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t synchronize(AsyncInfoTy &AsyncInfo);
- /// Calls the corresponding print in the \p RTLDEVID
+ /// Calls the corresponding print in the \p RTLDEVID
/// device RTL to obtain the information of the specific device.
bool printDeviceInfo(int32_t RTLDevID);
+ /// Event related interfaces.
+ /// {
+ /// Create an event.
+ int32_t createEvent(void **Event);
+
+ /// Record the event based on status in AsyncInfo->Queue at the moment the
+ /// function is called.
+ int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo);
+
+ /// Wait for an event. This function can be blocking or non-blocking,
+ /// depending on the implmentation. It is expected to set a dependence on the
+ /// event such that corresponding operations shall only start once the event
+ /// is fulfilled.
+ int32_t waitEvent(void *Event, AsyncInfoTy &AsyncInfo);
+
+ /// Synchronize the event. It is expected to block the thread.
+ int32_t syncEvent(void *Event);
+
+ /// Destroy the event.
+ int32_t destroyEvent(void *Event);
+ /// }
+
private:
// Call to RTL
void init(); // To be called only via DeviceTy::initOnce()
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
*((void **)&R.print_device_info) =
dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
+ *((void **)&R.create_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_create_event");
+ *((void **)&R.record_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_record_event");
+ *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event");
+ *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event");
+ *((void **)&R.destroy_event) =
+ dlsym(dynlib_handle, "__tgt_rtl_destroy_event");
}
#if OMPT_SUPPORT
typedef int32_t(supports_empty_images_ty)();
typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
+ typedef int32_t(create_event_ty)(int32_t, void **);
+ typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
+ typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
+ typedef int32_t(sync_event_ty)(int32_t, void *);
+ typedef int32_t(destroy_event_ty)(int32_t, void *);
int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
print_device_info_ty *print_device_info = nullptr;
+ create_event_ty *create_event = nullptr;
+ record_event_ty *record_event = nullptr;
+ wait_event_ty *wait_event = nullptr;
+ sync_event_ty *sync_event = nullptr;
+ destroy_event_ty *destroy_event = nullptr;
// Are there images associated with this RTL.
bool isUsed = false;