+++ /dev/null
-set(LLVM_OPTIONAL_SOURCES
- mlir-cuda-runner.cpp
- )
-set(LLVM_LINK_COMPONENTS
- Core
- Support
-)
-
-if(MLIR_CUDA_RUNNER_ENABLED)
- if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD))
- message(SEND_ERROR
- "Building the mlir cuda runner requires the NVPTX backend")
- endif()
-
- # Configure CUDA runner support. Using check_language first allows us to give
- # a custom error message.
- include(CheckLanguage)
- check_language(CUDA)
- if (CMAKE_CUDA_COMPILER)
- enable_language(CUDA)
- else()
- message(SEND_ERROR
- "Building the mlir cuda runner requires a working CUDA install")
- endif()
-
- # We need the libcuda.so library.
- find_library(CUDA_RUNTIME_LIBRARY cuda)
-
- get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
- set(LIBS
- ${conversion_libs}
- MLIRJitRunner
- MLIRAnalysis
- MLIRAsync
- MLIREDSC
- MLIRExecutionEngine
- MLIRGPU
- MLIRIR
- MLIRLLVMIR
- MLIRLLVMToLLVMIRTranslation
- MLIRNVVMIR
- MLIRParser
- MLIRStandard
- MLIRSupport
- MLIRTargetLLVMIRExport
- MLIRNVVMToLLVMIRTranslation
- MLIRTransforms
- MLIRTranslation
- ${CUDA_RUNTIME_LIBRARY}
- )
-
- # Manually expand the target library, since our MLIR libraries
- # aren't plugged into the LLVM dependency tracking. If we don't
- # do this then we can't insert the CodeGen library after ourselves
- llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens)
- # Prepend LLVM in front of every target, this is how the library
- # are named with CMake
- SET(targets_to_link)
- FOREACH(t ${TARGET_LIBS})
- LIST(APPEND targets_to_link "LLVM${t}")
- ENDFOREACH(t)
-
- add_llvm_tool(mlir-cuda-runner
- mlir-cuda-runner.cpp
-
- DEPENDS
- mlir_cuda_runtime
- )
- target_include_directories(mlir-cuda-runner
- PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
- )
- llvm_update_compile_flags(mlir-cuda-runner)
- target_link_libraries(mlir-cuda-runner PRIVATE ${LIBS} ${targets_to_link})
-
-endif()
+++ /dev/null
-//===- mlir-cuda-runner.cpp - MLIR CUDA Execution Driver-------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a command line utility that executes an MLIR file on the GPU by
-// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
-// latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-
-#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
-#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-#include "mlir/Conversion/Passes.h"
-#include "mlir/Dialect/Async/IR/Async.h"
-#include "mlir/Dialect/Async/Passes.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/GPU/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/ExecutionEngine/JitRunner.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetSelect.h"
-
-#include "cuda.h"
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
- CUresult result, Location loc) {
- const char *error;
- cuGetErrorString(result, &error);
- emitError(loc, expr.concat(" failed with error code ")
- .concat(llvm::Twine{error})
- .concat("[")
- .concat(buffer)
- .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr) \
- do { \
- if (auto status = (expr)) { \
- emitCudaError(#expr, jitErrorBuffer, status, loc); \
- return {}; \
- } \
- } while (false)
-
-OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
- StringRef name) {
- char jitErrorBuffer[4096] = {0};
-
- // Initialize CUDA once in a thread-safe manner.
- static CUresult cuInitResult = [] { return cuInit(/*flags=*/0); }();
- RETURN_ON_CUDA_ERROR(cuInitResult);
-
- // Linking requires a device context.
- CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
- CUcontext context;
- RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
- CUlinkState linkState;
-
- CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
- void *jitOptionsVals[] = {jitErrorBuffer,
- reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
- RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
- jitOptions, /* jit options */
- jitOptionsVals, /* jit option values */
- &linkState));
-
- RETURN_ON_CUDA_ERROR(
- cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
- const_cast<void *>(static_cast<const void *>(ptx.c_str())),
- ptx.length(), name.str().data(), /* kernel name */
- 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
- ));
-
- void *cubinData;
- size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
- char *cubinAsChar = static_cast<char *>(cubinData);
- OwnedBlob result =
- std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
- // This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
- RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
-
- return result;
-}
-
-struct GpuToCubinPipelineOptions
- : public mlir::PassPipelineOptions<GpuToCubinPipelineOptions> {
- Option<std::string> gpuBinaryAnnotation{
- *this, "gpu-binary-annotation",
- llvm::cl::desc("Annotation attribute string for GPU binary"),
- llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
-};
-
-// Register cuda-runner specific passes.
-static void registerCudaRunnerPasses() {
- PassPipelineRegistration<GpuToCubinPipelineOptions> registerGpuToCubin(
- "gpu-to-cubin", "Generate CUBIN from gpu.launch regions",
- [&](OpPassManager &pm, const GpuToCubinPipelineOptions &options) {
- pm.addPass(createGpuKernelOutliningPass());
- auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
- kernelPm.addPass(createStripDebugInfoPass());
- kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
- kernelPm.addPass(createConvertGPUKernelToBlobPass(
- translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
- "sm_35", "+ptx60", options.gpuBinaryAnnotation));
- });
- registerGPUPasses();
- registerGpuToLLVMConversionPassPass();
- registerAsyncPasses();
- registerConvertAsyncToLLVMPass();
- registerConvertStandardToLLVMPass();
-}
-
-static LogicalResult runMLIRPasses(ModuleOp module,
- PassPipelineCLParser &passPipeline) {
- PassManager pm(module.getContext(), PassManager::Nesting::Implicit);
- applyPassManagerCLOptions(pm);
-
- auto errorHandler = [&](const Twine &msg) {
- emitError(UnknownLoc::get(module.getContext())) << msg;
- return failure();
- };
-
- // Build the provided pipeline.
- if (failed(passPipeline.addToPipeline(pm, errorHandler)))
- return failure();
-
- // Run the pipeline.
- return pm.run(module);
-}
-
-int main(int argc, char **argv) {
- llvm::InitLLVM y(argc, argv);
- llvm::InitializeNativeTarget();
- llvm::InitializeNativeTargetAsmPrinter();
-
- // Initialize LLVM NVPTX backend.
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-
- mlir::initializeLLVMPasses();
-
- registerCudaRunnerPasses();
- PassPipelineCLParser passPipeline("", "Compiler passes to run");
- registerPassManagerCLOptions();
-
- auto mlirTransformer = [&](ModuleOp module) {
- return runMLIRPasses(module, passPipeline);
- };
-
- mlir::JitRunnerConfig jitRunnerConfig;
- jitRunnerConfig.mlirTransformer = mlirTransformer;
-
- mlir::DialectRegistry registry;
- registry.insert<mlir::LLVM::LLVMDialect, mlir::NVVM::NVVMDialect,
- mlir::async::AsyncDialect, mlir::gpu::GPUDialect,
- mlir::StandardOpsDialect>();
- mlir::registerLLVMDialectTranslation(registry);
- mlir::registerNVVMDialectTranslation(registry);
-
- return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
-}