From 6da348569cd20632d8ee2213fbab59850e133eb0 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Mon, 10 May 2021 15:27:49 +0100 Subject: [PATCH] [libomptarget] Add support for target allocators to dynamic cuda RTL [libomptarget] Add support for target allocators to dynamic cuda RTL Follow on to D102000 which introduced new calls into libcuda. This patch adds the corresponding entry points to dynamic_cuda, fixing the build for systems that do not have the cuda toolkit installed. Function types and enum from https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html Reviewed By: pdhaliwal Differential Revision: https://reviews.llvm.org/D102169 --- openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp | 5 ++++- openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp index bb817b7..c84b381 100644 --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp @@ -32,14 +32,17 @@ DLWRAP(cuGetErrorString, 2); DLWRAP(cuLaunchKernel, 11); DLWRAP(cuMemAlloc, 2); -DLWRAP(cuMemcpyDtoDAsync, 4); +DLWRAP(cuMemAllocHost, 2); +DLWRAP(cuMemAllocManaged, 3); +DLWRAP(cuMemcpyDtoDAsync, 4); DLWRAP(cuMemcpyDtoH, 3); DLWRAP(cuMemcpyDtoHAsync, 4); DLWRAP(cuMemcpyHtoD, 3); DLWRAP(cuMemcpyHtoDAsync, 4); DLWRAP(cuMemFree, 1); +DLWRAP(cuMemFreeHost, 1); DLWRAP(cuModuleGetFunction, 3); DLWRAP(cuModuleGetGlobal, 4); diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h index 7885b0e..045c39c 100644 --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h @@ -49,6 +49,12 @@ typedef enum CUctx_flags_enum { CU_CTX_SCHED_MASK = 0x07, } CUctx_flags; +typedef enum CUmemAttach_flags_enum { + CU_MEM_ATTACH_GLOBAL = 0x1, + CU_MEM_ATTACH_HOST = 0x2, + CU_MEM_ATTACH_SINGLE = 0x4, +} CUmemAttach_flags; + CUresult cuCtxGetDevice(CUdevice *); CUresult cuDeviceGet(CUdevice *, int); CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice); @@ -62,14 +68,18 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned, void **); CUresult cuMemAlloc(CUdeviceptr *, size_t); -CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); +CUresult cuMemAllocHost(void **, size_t); +CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int); +CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream); CUresult cuMemFree(CUdeviceptr); +CUresult cuMemFreeHost(void *); + CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); -- 2.7.4