template <typename T> class OpPassBase;
using OwnedCubin = std::unique_ptr<std::vector<char>>;
-using CubinGenerator = std::function<OwnedCubin(const std::string &, FuncOp &)>;
+using CubinGenerator =
+ std::function<OwnedCubin(const std::string &, Location, StringRef)>;
/// Creates a pass to convert kernel functions into CUBIN blobs.
///
: cubinGenerator(cubinGenerator) {}
void runOnModule() override {
- if (!getModule().getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName()))
+ ModuleOp module = getModule();
+ if (!module.getAttrOfType<UnitAttr>(
+ gpu::GPUDialect::getKernelModuleAttrName()) ||
+ !module.getName())
return;
// Make sure the NVPTX target is initialized.
LLVMInitializeNVPTXTargetMC();
LLVMInitializeNVPTXAsmPrinter();
- auto llvmModule = translateModuleToNVVMIR(getModule());
+ auto llvmModule = translateModuleToNVVMIR(module);
if (!llvmModule)
return signalPassFailure();
- for (auto function : getModule().getOps<FuncOp>()) {
- if (!gpu::GPUDialect::isKernel(function))
- continue;
- if (failed(translateGpuKernelToCubinAnnotation(*llvmModule, function)))
- signalPassFailure();
- }
+ // Translate the module to CUBIN and attach the result as attribute to the
+ // module.
+ if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
+ *llvmModule, module.getLoc(), *module.getName()))
+ module.setAttr(kCubinAnnotation, cubinAttr);
+ else
+ signalPassFailure();
}
private:
static OwnedCubin compilePtxToCubinForTesting(const std::string &ptx,
- FuncOp &function);
+ Location, StringRef);
std::string translateModuleToPtx(llvm::Module &module,
llvm::TargetMachine &target_machine);
- /// Converts llvmModule to cubin using the user-provded generator.
- OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, FuncOp &function);
+ /// Converts llvmModule to cubin using the user-provded generator. Location is
+ /// used for error reporting and name is forwarded to the CUBIN generator to
+ /// use in its logging mechanisms.
+ OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, Location loc,
+ StringRef name);
- /// Translates llvmModule to cubin and assigns it to attribute of function.
- LogicalResult translateGpuKernelToCubinAnnotation(llvm::Module &llvmModule,
- FuncOp &function);
+ /// Translates llvmModule to cubin and returns the result as attribute.
+ StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
+ Location loc, StringRef name);
CubinGenerator cubinGenerator;
};
OwnedCubin
GpuKernelToCubinPass::compilePtxToCubinForTesting(const std::string &ptx,
- FuncOp &function) {
+ Location, StringRef) {
const char data[] = "CUBIN";
return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
}
OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
- FuncOp &function) {
+ Location loc,
+ StringRef name) {
std::unique_ptr<llvm::TargetMachine> targetMachine;
{
std::string error;
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget("", triple, error);
if (target == nullptr) {
- function.emitError("cannot initialize target triple");
+ emitError(loc, "cannot initialize target triple");
return {};
}
targetMachine.reset(
auto ptx = translateModuleToPtx(llvmModule, *targetMachine);
- return cubinGenerator(ptx, function);
+ return cubinGenerator(ptx, loc, name);
}
-LogicalResult GpuKernelToCubinPass::translateGpuKernelToCubinAnnotation(
- llvm::Module &llvmModule, FuncOp &function) {
- auto cubin = convertModuleToCubin(llvmModule, function);
+StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
+ llvm::Module &llvmModule, Location loc, StringRef name) {
+ auto cubin = convertModuleToCubin(llvmModule, loc, name);
if (!cubin)
- return function.emitError("translation to CUDA binary failed.");
-
- Builder builder(function.getContext());
- function.setAttr(kCubinAnnotation,
- builder.getStringAttr({cubin->data(), cubin->size()}));
-
- // Remove the body of the kernel function now that it has been translated.
- // The main reason to do this is so that the resulting module no longer
- // contains the NVVM instructions (typically contained in the kernel bodies)
- // and hence can be compiled into host code by a separate pass.
- function.eraseBody();
-
- return success();
+ return {};
+ return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
}
std::unique_ptr<OpPassBase<ModuleOp>>
void declareCudaFunctions(Location loc);
Value *setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
- Value *generateKernelNameConstant(FuncOp kernelFunction, Location &loc,
+ Value *generateKernelNameConstant(StringRef name, Location &loc,
OpBuilder &builder);
void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
// %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
// }
Value *GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
- FuncOp kernelFunction, Location &loc, OpBuilder &builder) {
+ StringRef name, Location &loc, OpBuilder &builder) {
// Make sure the trailing zero is included in the constant.
- std::vector<char> kernelName(kernelFunction.getName().begin(),
- kernelFunction.getName().end());
+ std::vector<char> kernelName(name.begin(), name.end());
kernelName.push_back('\0');
- std::string globalName =
- llvm::formatv("{0}_kernel_name", kernelFunction.getName());
+ std::string globalName = llvm::formatv("{0}_kernel_name", name);
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
llvmDialect);
auto kernelModule =
getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
assert(kernelModule && "expected a kernel module");
- auto kernelFunction = kernelModule.lookupSymbol<FuncOp>(launchOp.kernel());
- assert(kernelFunction && "expected a kernel function");
- auto cubinAttr = kernelFunction.getAttrOfType<StringAttr>(kCubinAnnotation);
+ auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
if (!cubinAttr) {
- kernelFunction.emitOpError()
+ kernelModule.emitOpError()
<< "missing " << kCubinAnnotation << " attribute";
return signalPassFailure();
}
// the kernel function.
auto cuOwningModuleRef =
builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
- auto kernelName = generateKernelNameConstant(kernelFunction, loc, builder);
+ auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
auto cuFunction = allocatePointer(builder, loc);
FuncOp cuModuleGetFunction =
getModule().lookupSymbol<FuncOp>(cuModuleGetFunctionName);
// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
- module @kernel_module attributes {gpu.kernel_module} {
+ module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} {
func @kernel(!llvm.float, !llvm<"float*">)
- attributes { gpu.kernel, nvvm.cubin = "CUBIN" }
+ attributes { gpu.kernel }
}
func @foo() {
// RUN: mlir-opt %s --test-kernel-to-cubin -split-input-file | FileCheck %s
-module attributes {gpu.kernel_module} {
+// CHECK: attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"}
+module @kernels attributes {gpu.kernel_module} {
func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">)
- // CHECK: attributes {gpu.kernel, nvvm.cubin = "CUBIN"}
attributes { gpu.kernel } {
- // CHECK-NOT: llvm.return
llvm.return
}
}
using namespace mlir;
inline void emit_cuda_error(const llvm::Twine &message, const char *buffer,
- CUresult error, FuncOp &function) {
- function.emitError(message.concat(" failed with error code ")
- .concat(llvm::Twine{error})
- .concat("[")
- .concat(buffer)
- .concat("]"));
+ CUresult error, Location loc) {
+ emitError(loc, message.concat(" failed with error code ")
+ .concat(llvm::Twine{error})
+ .concat("[")
+ .concat(buffer)
+ .concat("]"));
}
#define RETURN_ON_CUDA_ERROR(expr, msg) \
{ \
auto _cuda_error = (expr); \
if (_cuda_error != CUDA_SUCCESS) { \
- emit_cuda_error(msg, jitErrorBuffer, _cuda_error, function); \
+ emit_cuda_error(msg, jitErrorBuffer, _cuda_error, loc); \
return {}; \
} \
}
-OwnedCubin compilePtxToCubin(const std::string ptx, FuncOp &function) {
+OwnedCubin compilePtxToCubin(const std::string ptx, Location loc,
+ StringRef name) {
char jitErrorBuffer[4096] = {0};
RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");
RETURN_ON_CUDA_ERROR(
cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
const_cast<void *>(static_cast<const void *>(ptx.c_str())),
- ptx.length(), function.getName().data(), /* kernel name */
- 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
+ ptx.length(), name.data(), /* kernel name */
+ 0, /* number of jit options */
+ nullptr, /* jit options */
+ nullptr /* jit option values */
),
"cuLinkAddData");