From d4ba4c6af7100ffbdd740ff8e4c26ad19065d485 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 17 Mar 2023 14:23:42 -0700 Subject: [PATCH] Revert unintentionally committed "Use nvptxcompile library." This reverts commit 5f66348e59aa7ce5e5780a972b3875268c45d57c. --- mlir/lib/Dialect/GPU/CMakeLists.txt | 7 +- .../Dialect/GPU/Transforms/SerializeToCubin.cpp | 96 ++++++++++------------ mlir/lib/ExecutionEngine/CMakeLists.txt | 6 +- 3 files changed, 50 insertions(+), 59 deletions(-) diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index d5c2949..94f3ab5 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -91,8 +91,7 @@ if(MLIR_ENABLE_CUDA_RUNNER) message(SEND_ERROR "Building mlir with cuda support requires the NVPTX backend") endif() - - find_package(CUDAToolkit) + # Configure CUDA language support. Using check_language first allows us to # give a custom error message. include(CheckLanguage) @@ -116,12 +115,12 @@ if(MLIR_ENABLE_CUDA_RUNNER) ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) - #find_library(CUDA_NVPTX_LIBRARY nvptxcompiler_static) + find_library(CUDA_DRIVER_LIBRARY cuda) target_link_libraries(MLIRGPUTransforms PRIVATE MLIRNVVMToLLVMIRTranslation - CUDA::nvptxcompiler_static + ${CUDA_DRIVER_LIBRARY} ) endif() diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp index 1850e23..44a1402 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp @@ -20,63 +20,24 @@ #include "llvm/Support/TargetSelect.h" #include -#include using namespace mlir; -static void emitNvptxError(const llvm::Twine &expr, - nvPTXCompilerHandle compiler, - nvPTXCompileResult result, Location loc) { +static void emitCudaError(const llvm::Twine &expr, const char *buffer, + CUresult result, Location loc) { const char *error; - auto GetErrMsg = [](nvPTXCompileResult result) -> const char * { - switch (result) { - case NVPTXCOMPILE_SUCCESS: - return "Success"; - case NVPTXCOMPILE_ERROR_INVALID_COMPILER_HANDLE: - return "Invalid compiler handle"; - case NVPTXCOMPILE_ERROR_INVALID_INPUT: - return "Invalid input"; - case NVPTXCOMPILE_ERROR_COMPILATION_FAILURE: - return "Compilation failure"; - case NVPTXCOMPILE_ERROR_INTERNAL: - return "Internal error"; - case NVPTXCOMPILE_ERROR_OUT_OF_MEMORY: - return "Out of memory"; - case NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE: - return "Invocation incomplete"; - case NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION: - return "Unsupported PTX version"; - } - }; - size_t errorSize; - auto status = nvPTXCompilerGetErrorLogSize(compiler, &errorSize); - std::string error_log; - if (status == NVPTXCOMPILE_SUCCESS) { - error_log.resize(errorSize); - status = nvPTXCompilerGetErrorLog(compiler, error_log.data()); - if (status != NVPTXCOMPILE_SUCCESS) - error_log = ""; - } + cuGetErrorString(result, &error); emitError(loc, expr.concat(" failed with error code ") - .concat(llvm::Twine{GetErrMsg(result)}) + .concat(llvm::Twine{error}) .concat("[") - .concat(error_log) + .concat(buffer) .concat("]")); } #define RETURN_ON_CUDA_ERROR(expr) \ do { \ if (auto status = (expr)) { \ - emitNvptxError(#expr, compiler, status, loc); \ - return {}; \ - } \ - } while (false) - -#define RETURN_ON_NVPTX_ERROR(expr) \ - do { \ - nvPTXCompileResult result = (expr); \ - if (result != NVPTXCOMPILE_SUCCESS) { \ - emitNvptxError(#expr, compiler, result, loc); \ + emitCudaError(#expr, jitErrorBuffer, status, loc); \ return {}; \ } \ } while (false) @@ -127,17 +88,46 @@ std::unique_ptr> SerializeToCubinPass::serializeISA(const std::string &isa) { Location loc = getOperation().getLoc(); char jitErrorBuffer[4096] = {0}; - nvPTXCompilerHandle compiler; - nvPTXCompilerCreate(&compiler, isa.length(), isa.c_str()); - - nvPTXCompilerCompile(compiler, 0, nullptr); + RETURN_ON_CUDA_ERROR(cuInit(0)); + + // Linking requires a device context. + CUdevice device; + RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); + CUcontext context; + RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); + CUlinkState linkState; + + CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, + CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; + void *jitOptionsVals[] = {jitErrorBuffer, + reinterpret_cast(sizeof(jitErrorBuffer))}; + + RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ + jitOptions, /* jit options */ + jitOptionsVals, /* jit option values */ + &linkState)); + + auto kernelName = getOperation().getName().str(); + RETURN_ON_CUDA_ERROR(cuLinkAddData( + linkState, CUjitInputType::CU_JIT_INPUT_PTX, + const_cast(static_cast(isa.c_str())), isa.length(), + kernelName.c_str(), 0, /* number of jit options */ + nullptr, /* jit options */ + nullptr /* jit option values */ + )); + + void *cubinData; size_t cubinSize; - nvPTXCompilerGetCompiledProgramSize(compiler, &cubinSize); + RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); + + char *cubinAsChar = static_cast(cubinData); + auto result = + std::make_unique>(cubinAsChar, cubinAsChar + cubinSize); - auto result = std::make_unique>(cubinSize); - nvPTXCompilerGetCompiledProgram(compiler, result->data()); - nvPTXCompilerDestroy(&compiler); + // This will also destroy the cubin data. + RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); + RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); return result; } diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index 454ec4e..a212e1a 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -169,7 +169,6 @@ if(LLVM_ENABLE_PIC) target_compile_definitions(mlir_async_runtime PRIVATE mlir_async_runtime_EXPORTS) if(MLIR_ENABLE_CUDA_RUNNER) - find_package(CUDAToolkit) # Configure CUDA support. Using check_language first allows us to give a # custom error message. include(CheckLanguage) @@ -181,6 +180,9 @@ if(LLVM_ENABLE_PIC) "Building the mlir cuda runner requires a working CUDA install") endif() + # We need the libcuda.so library. + find_library(CUDA_RUNTIME_LIBRARY cuda) + add_mlir_library(mlir_cuda_runtime SHARED CudaRuntimeWrappers.cpp @@ -194,7 +196,7 @@ if(LLVM_ENABLE_PIC) ) target_link_libraries(mlir_cuda_runtime PRIVATE - CUDA::cuda_driver + ${CUDA_RUNTIME_LIBRARY} ) endif() -- 2.7.4