From: Shilei Tian Date: Sat, 28 Aug 2021 20:24:06 +0000 (-0400) Subject: [OpenMP][Offloading] Add support for event related interfaces X-Git-Tag: upstream/15.0.7~32790 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=29df4ab3f3c9bf37529ee04795abfd90b7691857;p=platform%2Fupstream%2Fllvm.git [OpenMP][Offloading] Add support for event related interfaces This patch adds the support form event related interfaces, which will be used later to fix data race. See D104418 for more details. Reviewed By: jdoerfert, ye-luo Differential Revision: https://reviews.llvm.org/D108528 --- diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h index b7b3eb80..aefad9e 100644 --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -145,6 +145,32 @@ void __tgt_rtl_set_info_flag(uint32_t); // Print the device information void __tgt_rtl_print_device_info(int32_t ID); +// Event related interfaces. It is expected to use the interfaces in the +// following way: +// 1) Create an event on the target device (__tgt_rtl_create_event). +// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment +// of function call to __tgt_rtl_record_event. An event becomes "meaningful" +// once it is recorded, such that others can depend on it. +// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the +// operation is blocking or non-blocking depends on the target. It is expected +// to be non-blocking, just set dependence and return. +// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the +// thread calling the function. +// 5) Destroy the event (__tgt_rtl_destroy_event). +// { +int32_t __tgt_rtl_create_event(int32_t ID, void **Event); + +int32_t __tgt_rtl_record_event(int32_t ID, void *Event, + __tgt_async_info *AsyncInfo); + +int32_t __tgt_rtl_wait_event(int32_t ID, void *Event, + __tgt_async_info *AsyncInfo); + +int32_t __tgt_rtl_sync_event(int32_t ID, void *Event); + +int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event); +// } + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp index fb776f7..9e9cea0 100644 --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp @@ -69,6 +69,12 @@ DLWRAP(cuMemcpyPeerAsync, 6); DLWRAP(cuCtxGetLimit, 2); DLWRAP(cuCtxSetLimit, 2); +DLWRAP(cuEventCreate, 2); +DLWRAP(cuEventRecord, 2); +DLWRAP(cuStreamWaitEvent, 3); +DLWRAP(cuEventSynchronize, 1); +DLWRAP(cuEventDestroy, 1); + DLWRAP_FINALIZE(); #ifndef DYNAMIC_CUDA_PATH diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h index 14049e1f..c6aeafe 100644 --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h @@ -22,6 +22,7 @@ typedef struct CUmod_st *CUmodule; typedef struct CUctx_st *CUcontext; typedef struct CUfunc_st *CUfunction; typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; typedef enum cudaError_enum { CUDA_SUCCESS = 0, @@ -248,4 +249,10 @@ CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, CUresult cuCtxGetLimit(size_t *, CUlimit); CUresult cuCtxSetLimit(CUlimit, size_t); +CUresult cuEventCreate(CUevent *, unsigned int); +CUresult cuEventRecord(CUevent, CUstream); +CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); +CUresult cuEventSynchronize(CUevent); +CUresult cuEventDestroy(CUevent); + #endif diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 44fc672..c6f51a5 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -129,6 +129,62 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size, return OFFLOAD_SUCCESS; } +int createEvent(void **P) { + CUevent Event = nullptr; + + CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT); + if (Err != CUDA_SUCCESS) { + DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + *P = Event; + + return OFFLOAD_SUCCESS; +} + +int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { + CUstream Stream = reinterpret_cast(AsyncInfo->Queue); + CUevent Event = reinterpret_cast(EventPtr); + + CUresult Err = cuEventRecord(Event, Stream); + if (Err != CUDA_SUCCESS) { + DP("Error when recording event. stream = " DPxMOD ", event = " DPxMOD "\n", + DPxPTR(Stream), DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + +int syncEvent(void *EventPtr) { + CUevent Event = reinterpret_cast(EventPtr); + + CUresult Err = cuEventSynchronize(Event); + if (Err != CUDA_SUCCESS) { + DP("Error when syncing event = " DPxMOD "\n", DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + +int destroyEvent(void *EventPtr) { + CUevent Event = reinterpret_cast(EventPtr); + + CUresult Err = cuEventDestroy(Event); + if (Err != CUDA_SUCCESS) { + DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + // Structure contains per-device data struct DeviceDataTy { /// List that contains all the kernels. @@ -1332,6 +1388,25 @@ public: "Error returned from cuDeviceGetAttribute\n"); printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2); } + + int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo, + void *EventPtr) const { + CUstream Stream = getStream(DeviceId, AsyncInfo); + CUevent Event = reinterpret_cast(EventPtr); + + // We don't use CU_EVENT_WAIT_DEFAULT here as it is only available from + // specific CUDA version, and defined as 0x0. In previous version, per CUDA + // API document, that argument has to be 0x0. + CUresult Err = cuStreamWaitEvent(Stream, Event, 0); + if (Err != CUDA_SUCCESS) { + DP("Error when waiting event. stream = " DPxMOD ", event = " DPxMOD "\n", + DPxPTR(Stream), DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; + } }; DeviceRTLTy DeviceRTL; @@ -1537,6 +1612,41 @@ void __tgt_rtl_print_device_info(int32_t device_id) { DeviceRTL.printDeviceInfo(device_id); } +int32_t __tgt_rtl_create_event(int32_t device_id, void **event) { + assert(event && "event is nullptr"); + return createEvent(event); +} + +int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr, + __tgt_async_info *async_info_ptr) { + assert(async_info_ptr && "async_info_ptr is nullptr"); + assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); + assert(event_ptr && "event_ptr is nullptr"); + + return recordEvent(event_ptr, async_info_ptr); +} + +int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr, + __tgt_async_info *async_info_ptr) { + assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + assert(async_info_ptr && "async_info_ptr is nullptr"); + assert(event_ptr && "event is nullptr"); + + return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr); +} + +int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) { + assert(event_ptr && "event is nullptr"); + + return syncEvent(event_ptr); +} + +int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) { + assert(event_ptr && "event is nullptr"); + + return destroyEvent(event_ptr); +} + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports index 61cc674..0a3dc8a 100644 --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -24,6 +24,11 @@ VERS1.0 { __tgt_rtl_supports_empty_images; __tgt_rtl_set_info_flag; __tgt_rtl_print_device_info; + __tgt_rtl_create_event; + __tgt_rtl_record_event; + __tgt_rtl_wait_event; + __tgt_rtl_sync_event; + __tgt_rtl_destroy_event; local: *; }; diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index f660d23..fd7c73d 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -553,6 +553,41 @@ int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { return OFFLOAD_SUCCESS; } +int32_t DeviceTy::createEvent(void **Event) { + if (RTL->create_event) + return RTL->create_event(RTLDeviceID, Event); + + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { + if (RTL->record_event) + return RTL->record_event(RTLDeviceID, Event, AsyncInfo); + + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { + if (RTL->wait_event) + return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); + + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::syncEvent(void *Event) { + if (RTL->sync_event) + return RTL->sync_event(RTLDeviceID, Event); + + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::destroyEvent(void *Event) { + if (RTL->create_event) + return RTL->destroy_event(RTLDeviceID, Event); + + return OFFLOAD_SUCCESS; +} + /// Check whether a device has an associated RTL and initialize it if it's not /// already initialized. bool device_is_ready(int device_num) { diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h index 21cce35..58c6316 100644 --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -275,10 +275,32 @@ struct DeviceTy { /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. int32_t synchronize(AsyncInfoTy &AsyncInfo); - /// Calls the corresponding print in the \p RTLDEVID + /// Calls the corresponding print in the \p RTLDEVID /// device RTL to obtain the information of the specific device. bool printDeviceInfo(int32_t RTLDevID); + /// Event related interfaces. + /// { + /// Create an event. + int32_t createEvent(void **Event); + + /// Record the event based on status in AsyncInfo->Queue at the moment the + /// function is called. + int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo); + + /// Wait for an event. This function can be blocking or non-blocking, + /// depending on the implmentation. It is expected to set a dependence on the + /// event such that corresponding operations shall only start once the event + /// is fulfilled. + int32_t waitEvent(void *Event, AsyncInfoTy &AsyncInfo); + + /// Synchronize the event. It is expected to block the thread. + int32_t syncEvent(void *Event); + + /// Destroy the event. + int32_t destroyEvent(void *Event); + /// } + private: // Call to RTL void init(); // To be called only via DeviceTy::initOnce() diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index 47d3a0f..264b1d4 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -183,6 +183,14 @@ void RTLsTy::LoadRTLs() { dlsym(dynlib_handle, "__tgt_rtl_set_info_flag"); *((void **)&R.print_device_info) = dlsym(dynlib_handle, "__tgt_rtl_print_device_info"); + *((void **)&R.create_event) = + dlsym(dynlib_handle, "__tgt_rtl_create_event"); + *((void **)&R.record_event) = + dlsym(dynlib_handle, "__tgt_rtl_record_event"); + *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event"); + *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event"); + *((void **)&R.destroy_event) = + dlsym(dynlib_handle, "__tgt_rtl_destroy_event"); } #if OMPT_SUPPORT diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h index db13927..88328d23 100644 --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -57,6 +57,11 @@ struct RTLInfoTy { typedef int32_t(supports_empty_images_ty)(); typedef void(print_device_info_ty)(int32_t); typedef void(set_info_flag_ty)(uint32_t); + typedef int32_t(create_event_ty)(int32_t, void **); + typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); + typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *); + typedef int32_t(sync_event_ty)(int32_t, void *); + typedef int32_t(destroy_event_ty)(int32_t, void *); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -95,6 +100,11 @@ struct RTLInfoTy { supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; print_device_info_ty *print_device_info = nullptr; + create_event_ty *create_event = nullptr; + record_event_ty *record_event = nullptr; + wait_event_ty *wait_event = nullptr; + sync_event_ty *sync_event = nullptr; + destroy_event_ty *destroy_event = nullptr; // Are there images associated with this RTL. bool isUsed = false;