void runOnOperation() final;
+protected:
+ void getDependentDialects(DialectRegistry ®istry) const override;
+
private:
- // Creates the LLVM target machine to generate the ISA.
+ /// Creates the LLVM target machine to generate the ISA.
std::unique_ptr<llvm::TargetMachine> createTargetMachine();
- // Translates the 'getOperation()' result to an LLVM module.
+ /// Translates the 'getOperation()' result to an LLVM module.
virtual std::unique_ptr<llvm::Module>
- translateToLLVMIR(llvm::LLVMContext &llvmContext) = 0;
+ translateToLLVMIR(llvm::LLVMContext &llvmContext);
- // Serializes the target ISA to binary form.
+ /// Serializes the target ISA to binary form.
virtual std::unique_ptr<std::vector<char>>
serializeISA(const std::string &isa) = 0;
// Registration
//===----------------------------------------------------------------------===//
+/// Register pass to serialize GPU kernel functions to a CUBIN binary
+/// annotation.
+void registerGpuSerializeToCubinPass();
+
/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/GPU/Passes.h.inc"
+if (MLIR_CUDA_CONVERSIONS_ENABLED)
+ set(NVPTX_LIBS
+ NVPTXCodeGen
+ NVPTXDesc
+ NVPTXInfo
+ )
+endif()
+
add_mlir_dialect_library(MLIRGPU
IR/GPUDialect.cpp
Transforms/AllReduceLowering.cpp
Transforms/MemoryPromotion.cpp
Transforms/ParallelLoopMapper.cpp
Transforms/SerializeToBlob.cpp
+ Transforms/SerializeToCubin.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
LINK_COMPONENTS
Core
MC
+ ${NVPTX_LIBS}
DEPENDS
MLIRGPUOpsIncGen
MLIREDSC
MLIRIR
MLIRLLVMIR
+ MLIRLLVMToLLVMIRTranslation
MLIRSCF
MLIRPass
MLIRSideEffectInterfaces
MLIRSupport
MLIRTransformUtils
)
+
+if(MLIR_CUDA_RUNNER_ENABLED)
+ if(NOT MLIR_CUDA_CONVERSIONS_ENABLED)
+ message(SEND_ERROR
+ "Building mlir with cuda support requires the NVPTX backend")
+ endif()
+
+ # Configure CUDA language support. Using check_language first allows us to
+ # give a custom error message.
+ include(CheckLanguage)
+ check_language(CUDA)
+ if (CMAKE_CUDA_COMPILER)
+ enable_language(CUDA)
+ else()
+ message(SEND_ERROR
+ "Building mlir with cuda support requires a working CUDA install")
+ endif()
+
+ # Enable gpu-to-cubin pass.
+ target_compile_definitions(obj.MLIRGPU
+ PRIVATE
+ MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
+ )
+
+ # Add CUDA headers includes and the libcuda.so library.
+ target_include_directories(obj.MLIRGPU
+ PRIVATE
+ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+ )
+
+ find_library(CUDA_DRIVER_LIBRARY cuda)
+
+ target_link_libraries(MLIRGPU
+ PRIVATE
+ MLIRNVVMToLLVMIRTranslation
+ ${CUDA_DRIVER_LIBRARY}
+ )
+
+endif()
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
getOperation()->setAttr(gpuBinaryAnnotation, attr);
}
+void gpu::SerializeToBlobPass::getDependentDialects(
+ DialectRegistry ®istry) const {
+ registerLLVMDialectTranslation(registry);
+ OperationPass<gpu::GPUModuleOp>::getDependentDialects(registry);
+}
+
std::unique_ptr<llvm::TargetMachine>
gpu::SerializeToBlobPass::createTargetMachine() {
Location loc = getOperation().getLoc();
return std::unique_ptr<llvm::TargetMachine>{machine};
}
+
+std::unique_ptr<llvm::Module>
+gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
+ return translateModuleToLLVMIR(getOperation(), llvmContext,
+ "LLVMDialectModule");
+}
--- /dev/null
+//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that serializes a gpu module into CUBIN blob and
+// adds that blob as a string attribute of the module.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/GPU/Passes.h"
+
+#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
+#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include <cuda.h>
+
+using namespace mlir;
+
+static void emitCudaError(const llvm::Twine &expr, const char *buffer,
+ CUresult result, Location loc) {
+ const char *error;
+ cuGetErrorString(result, &error);
+ emitError(loc, expr.concat(" failed with error code ")
+ .concat(llvm::Twine{error})
+ .concat("[")
+ .concat(buffer)
+ .concat("]"));
+}
+
+#define RETURN_ON_CUDA_ERROR(expr) \
+ do { \
+ if (auto status = (expr)) { \
+ emitCudaError(#expr, jitErrorBuffer, status, loc); \
+ return {}; \
+ } \
+ } while (false)
+
+namespace {
+class SerializeToCubinPass
+ : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
+public:
+ SerializeToCubinPass();
+
+private:
+ void getDependentDialects(DialectRegistry ®istry) const override;
+
+ // Serializes PTX to CUBIN.
+ std::unique_ptr<std::vector<char>>
+ serializeISA(const std::string &isa) override;
+};
+} // namespace
+
+// Sets the 'option' to 'value' unless it already has a value.
+static void maybeSetOption(Pass::Option<std::string> &option,
+ const char *value) {
+ if (!option.hasValue())
+ option = value;
+}
+
+SerializeToCubinPass::SerializeToCubinPass() {
+ maybeSetOption(this->triple, "nvptx64-nvidia-cuda");
+ maybeSetOption(this->chip, "sm_35");
+ maybeSetOption(this->features, "+ptx60");
+}
+
+void SerializeToCubinPass::getDependentDialects(
+ DialectRegistry ®istry) const {
+ registerNVVMDialectTranslation(registry);
+ gpu::SerializeToBlobPass::getDependentDialects(registry);
+}
+
+std::unique_ptr<std::vector<char>>
+SerializeToCubinPass::serializeISA(const std::string &isa) {
+ Location loc = getOperation().getLoc();
+ char jitErrorBuffer[4096] = {0};
+
+ RETURN_ON_CUDA_ERROR(cuInit(0));
+
+ // Linking requires a device context.
+ CUdevice device;
+ RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
+ CUcontext context;
+ RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
+ CUlinkState linkState;
+
+ CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
+ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
+ void *jitOptionsVals[] = {jitErrorBuffer,
+ reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
+
+ RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
+ jitOptions, /* jit options */
+ jitOptionsVals, /* jit option values */
+ &linkState));
+
+ auto kernelName = getOperation().getName().str();
+ RETURN_ON_CUDA_ERROR(cuLinkAddData(
+ linkState, CUjitInputType::CU_JIT_INPUT_PTX,
+ const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
+ kernelName.c_str(), 0, /* number of jit options */
+ nullptr, /* jit options */
+ nullptr /* jit option values */
+ ));
+
+ void *cubinData;
+ size_t cubinSize;
+ RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
+
+ char *cubinAsChar = static_cast<char *>(cubinData);
+ auto result =
+ std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
+
+ // This will also destroy the cubin data.
+ RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
+ RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
+
+ return result;
+}
+
+// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
+void mlir::registerGpuSerializeToCubinPass() {
+ PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
+ "gpu-to-cubin", "Lower GPU kernel function to CUBIN binary annotations",
+ [] {
+ // Initialize LLVM NVPTX backend.
+ LLVMInitializeNVPTXTarget();
+ LLVMInitializeNVPTXTargetInfo();
+ LLVMInitializeNVPTXTargetMC();
+ LLVMInitializeNVPTXAsmPrinter();
+
+ return std::make_unique<SerializeToCubinPass>();
+ });
+}
+#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
+void mlir::registerGpuSerializeToCubinPass() {}
+#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE