From 78b0630b72a9742d62b07cef912b72f1743bfae9 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Sat, 23 Jan 2021 20:33:12 +0000 Subject: [PATCH] [libomptarget][cuda] Call v2 functions explicitly [libomptarget][cuda] Call v2 functions explicitly rtl.cpp calls functions like cuMemFree that are replaced by a macro in cuda.h with cuMemFree_v2. This patch changes the source to use the v2 names consistently. See also D95104, D95155 for the idea. Alternatives are to use a mixture, e.g. call the macro names and explictly dlopen the _v2 names, or to keep the current status where the symbols are replaced by macros in both files Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95274 --- .../libomptarget/plugins/cuda/dynamic_cuda/cuda.h | 34 ++++++------------ openmp/libomptarget/plugins/cuda/src/rtl.cpp | 41 +++++++++++----------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h index 832c269..dd579a1 100644 --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h @@ -48,18 +48,6 @@ typedef enum CUctx_flags_enum { CU_CTX_SCHED_MASK = 0x07, } CUctx_flags; -#define cuMemFree cuMemFree_v2 -#define cuMemAlloc cuMemAlloc_v2 -#define cuMemcpyDtoH cuMemcpyDtoH_v2 -#define cuMemcpyHtoD cuMemcpyHtoD_v2 -#define cuStreamDestroy cuStreamDestroy_v2 -#define cuModuleGetGlobal cuModuleGetGlobal_v2 -#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 -#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 -#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 -#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2 -#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2 - CUresult cuCtxGetDevice(CUdevice *); CUresult cuDeviceGet(CUdevice *, int); CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); @@ -72,26 +60,26 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned, unsigned, unsigned, unsigned, CUstream, void **, void **); -CUresult cuMemAlloc(CUdeviceptr *, size_t); -CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); +CUresult cuMemAlloc_v2(CUdeviceptr *, size_t); +CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr, CUdeviceptr, size_t, CUstream); -CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); -CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); -CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); -CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); +CUresult cuMemcpyDtoH_v2(void *, CUdeviceptr, size_t); +CUresult cuMemcpyDtoHAsync_v2(void *, CUdeviceptr, size_t, CUstream); +CUresult cuMemcpyHtoD_v2(CUdeviceptr, const void *, size_t); +CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr, const void *, size_t, CUstream); -CUresult cuMemFree(CUdeviceptr); +CUresult cuMemFree_v2(CUdeviceptr); CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); -CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); +CUresult cuModuleGetGlobal_v2(CUdeviceptr *, size_t *, CUmodule, const char *); CUresult cuModuleUnload(CUmodule); CUresult cuStreamCreate(CUstream *, unsigned); -CUresult cuStreamDestroy(CUstream); +CUresult cuStreamDestroy_v2(CUstream); CUresult cuStreamSynchronize(CUstream); CUresult cuCtxSetCurrent(CUcontext); -CUresult cuDevicePrimaryCtxRelease(CUdevice); +CUresult cuDevicePrimaryCtxRelease_v2(CUdevice); CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *); -CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned); +CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice, unsigned); CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice); CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *, void **); diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index e4ac1e0..f83c9df 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -110,8 +110,8 @@ bool checkResult(CUresult Err, const char *ErrMsg) { int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size, CUstream Stream) { - CUresult Err = - cuMemcpyDtoDAsync((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, Size, Stream); + CUresult Err = cuMemcpyDtoDAsync_v2((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, + Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to device. Pointers: src " @@ -207,8 +207,8 @@ public: for (CUstream &S : StreamPool[I]) { if (S) - checkResult(cuStreamDestroy(S), - "Error returned from cuStreamDestroy\n"); + checkResult(cuStreamDestroy_v2(S), + "Error returned from cuStreamDestroy_v2\n"); } } } @@ -311,8 +311,8 @@ class DeviceRTLTy { return nullptr; CUdeviceptr DevicePtr; - Err = cuMemAlloc(&DevicePtr, Size); - if (!checkResult(Err, "Error returned from cuMemAlloc\n")) + Err = cuMemAlloc_v2(&DevicePtr, Size); + if (!checkResult(Err, "Error returned from cuMemAlloc_v2\n")) return nullptr; return (void *)DevicePtr; @@ -323,8 +323,8 @@ class DeviceRTLTy { if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) return OFFLOAD_FAIL; - Err = cuMemFree((CUdeviceptr)TgtPtr); - if (!checkResult(Err, "Error returned from cuMemFree\n")) + Err = cuMemFree_v2((CUdeviceptr)TgtPtr); + if (!checkResult(Err, "Error returned from cuMemFree_v2\n")) return OFFLOAD_FAIL; return OFFLOAD_SUCCESS; @@ -466,8 +466,8 @@ public: CUdevice Device; checkResult(cuCtxGetDevice(&Device), "Error returned from cuCtxGetDevice\n"); - checkResult(cuDevicePrimaryCtxRelease(Device), - "Error returned from cuDevicePrimaryCtxRelease\n"); + checkResult(cuDevicePrimaryCtxRelease_v2(Device), + "Error returned from cuDevicePrimaryCtxRelease_v2\n"); } } } @@ -506,8 +506,9 @@ public: } else { DP("The primary context is inactive, set its flags to " "CU_CTX_SCHED_BLOCKING_SYNC\n"); - Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC); - if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n")) + Err = cuDevicePrimaryCtxSetFlags_v2(Device, CU_CTX_SCHED_BLOCKING_SYNC); + if (!checkResult(Err, + "Error returned from cuDevicePrimaryCtxSetFlags_v2\n")) return OFFLOAD_FAIL; } @@ -656,7 +657,7 @@ public: __tgt_offload_entry Entry = *E; CUdeviceptr CUPtr; size_t CUSize; - Err = cuModuleGetGlobal(&CUPtr, &CUSize, Module, E->name); + Err = cuModuleGetGlobal_v2(&CUPtr, &CUSize, Module, E->name); // We keep this style here because we need the name if (Err != CUDA_SUCCESS) { REPORT("Loading global '%s' Failed\n", E->name); @@ -688,7 +689,7 @@ public: // If unified memory is present any target link or to variables // can access host addresses directly. There is no longer a // need for device copies. - cuMemcpyHtoD(CUPtr, E->addr, sizeof(void *)); + cuMemcpyHtoD_v2(CUPtr, E->addr, sizeof(void *)); DP("Copy linked variable host address (" DPxMOD ") to device address (" DPxMOD ")\n", DPxPTR(*((void **)E->addr)), DPxPTR(CUPtr)); @@ -719,7 +720,7 @@ public: CUdeviceptr ExecModePtr; size_t CUSize; - Err = cuModuleGetGlobal(&ExecModePtr, &CUSize, Module, ExecModeName); + Err = cuModuleGetGlobal_v2(&ExecModePtr, &CUSize, Module, ExecModeName); if (Err == CUDA_SUCCESS) { if (CUSize != sizeof(int8_t)) { DP("Loading global exec_mode '%s' - size mismatch (%zd != %zd)\n", @@ -727,7 +728,7 @@ public: return nullptr; } - Err = cuMemcpyDtoH(&ExecModeVal, ExecModePtr, CUSize); + Err = cuMemcpyDtoH_v2(&ExecModeVal, ExecModePtr, CUSize); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to host. Pointers: " "host = " DPxMOD ", device = " DPxMOD ", size = %zd\n", @@ -768,7 +769,7 @@ public: CUdeviceptr DeviceEnvPtr; size_t CUSize; - Err = cuModuleGetGlobal(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName); + Err = cuModuleGetGlobal_v2(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName); if (Err == CUDA_SUCCESS) { if (CUSize != sizeof(DeviceEnv)) { REPORT( @@ -778,7 +779,7 @@ public: return nullptr; } - Err = cuMemcpyHtoD(DeviceEnvPtr, &DeviceEnv, CUSize); + Err = cuMemcpyHtoD_v2(DeviceEnvPtr, &DeviceEnv, CUSize); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from host to device. Pointers: " "host = " DPxMOD ", device = " DPxMOD ", size = %zu\n", @@ -816,7 +817,7 @@ public: CUstream Stream = getStream(DeviceId, AsyncInfoPtr); - Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream); + Err = cuMemcpyHtoDAsync_v2((CUdeviceptr)TgtPtr, HstPtr, Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from host to device. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n", @@ -838,7 +839,7 @@ public: CUstream Stream = getStream(DeviceId, AsyncInfoPtr); - Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream); + Err = cuMemcpyDtoHAsync_v2(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when copying data from device to host. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n", -- 2.7.4