namespace llvm {
class Type;
class LLVMContext;
+namespace sys {
+template <bool mt_only>
+class SmartMutex;
+} // end namespace sys
} // end namespace llvm
namespace mlir {
/// function confirms that the Operation has the desired properties.
bool satisfiesLLVMModule(Operation *op);
+/// Clones the given module into the provided context. This is implemented by
+/// transforming the module into bitcode and then reparsing the bitcode in the
+/// provided context.
+std::unique_ptr<llvm::Module>
+cloneModuleIntoNewContext(llvm::LLVMContext *context, llvm::Module *module);
+
} // end namespace LLVM
} // end namespace mlir
~LLVMDialect();
llvm::LLVMContext &getLLVMContext();
llvm::Module &getLLVMModule();
+ llvm::sys::SmartMutex<true> &getLLVMContextMutex();
private:
friend LLVMType;
/// Original and translated module.
Operation *mlirModule;
std::unique_ptr<llvm::Module> llvmModule;
-
/// A converter for translating debug information.
std::unique_ptr<detail::DebugTranslation> debugTranslation;
std::unique_ptr<llvm::OpenMPIRBuilder> ompBuilder;
/// Precomputed pointer to OpenMP dialect.
const Dialect *ompDialect;
+ /// Pointer to the llvmDialect;
+ LLVMDialect *llvmDialect;
/// Mappings between llvm.mlir.global definitions and corresponding globals.
DenseMap<Operation *, llvm::GlobalValue *> globalsMapping;
#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
llvm::Module &module, llvm::TargetMachine &target_machine) {
std::string ptx;
{
+ // Clone the llvm module into a new context to enable concurrent compilation
+ // with multiple threads.
+ // TODO(zinenko): Reevaluate model of ownership of LLVMContext in
+ // LLVMDialect.
+ llvm::LLVMContext llvmContext;
+ auto clone = LLVM::cloneModuleIntoNewContext(&llvmContext, &module);
+
llvm::raw_string_ostream stream(ptx);
llvm::buffer_ostream pstream(stream);
llvm::legacy::PassManager codegen_passes;
target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
llvm::CGFT_AssemblyFile);
- codegen_passes.run(module);
+ codegen_passes.run(*clone);
}
return ptx;
void addParamToList(OpBuilder &builder, Location loc, Value param, Value list,
unsigned pos, Value one);
Value setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
- Value generateKernelNameConstant(StringRef name, Location loc,
- OpBuilder &builder);
+ Value generateKernelNameConstant(StringRef moduleName, StringRef name,
+ Location loc, OpBuilder &builder);
void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
public:
// %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
// }
Value GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
- StringRef name, Location loc, OpBuilder &builder) {
+ StringRef moduleName, StringRef name, Location loc, OpBuilder &builder) {
// Make sure the trailing zero is included in the constant.
std::vector<char> kernelName(name.begin(), name.end());
kernelName.push_back('\0');
- std::string globalName = std::string(llvm::formatv("{0}_kernel_name", name));
+ std::string globalName =
+ std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
LLVM::Linkage::Internal, llvmDialect);
// the kernel function.
auto cuOwningModuleRef =
builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
- auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
+ auto kernelName = generateKernelNameConstant(launchOp.getKernelModuleName(),
+ launchOp.kernel(), loc, builder);
auto cuFunction = allocatePointer(builder, loc);
auto cuModuleGetFunction =
getOperation().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);
target_link_libraries(MLIRLLVMIR
PUBLIC
LLVMAsmParser
+ LLVMBitReader
+ LLVMBitWriter
LLVMCore
LLVMSupport
LLVMFrontendOpenMP
#include "llvm/ADT/StringSwitch.h"
#include "llvm/AsmParser/Parser.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
llvm::LLVMContext &LLVMDialect::getLLVMContext() { return impl->llvmContext; }
llvm::Module &LLVMDialect::getLLVMModule() { return impl->module; }
+llvm::sys::SmartMutex<true> &LLVMDialect::getLLVMContextMutex() {
+ return impl->mutex;
+}
/// Parse a type registered to this dialect.
Type LLVMDialect::parseType(DialectAsmParser &parser) const {
return op->hasTrait<OpTrait::SymbolTable>() &&
op->hasTrait<OpTrait::IsIsolatedFromAbove>();
}
+
+std::unique_ptr<llvm::Module>
+mlir::LLVM::cloneModuleIntoNewContext(llvm::LLVMContext *context,
+ llvm::Module *module) {
+ SmallVector<char, 1> buffer;
+ {
+ llvm::raw_svector_ostream os(buffer);
+ WriteBitcodeToFile(*module, os);
+ }
+ llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
+ "cloned module buffer");
+ return cantFail(parseBitcodeFile(bufferRef, *context));
+}
PUBLIC
MLIRLLVMIR
MLIRTargetLLVMIR
- LLVMBitReader
- LLVMBitWriter
LLVMExecutionEngine
LLVMObject
LLVMOrcJIT
//
//===----------------------------------------------------------------------===//
#include "mlir/ExecutionEngine/ExecutionEngine.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/LLVMIR.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
// Clone module in a new LLVMContext since translateModuleToLLVMIR buries
// ownership too deeply.
// TODO(zinenko): Reevaluate model of ownership of LLVMContext in LLVMDialect.
- SmallVector<char, 1> buffer;
- {
- llvm::raw_svector_ostream os(buffer);
- WriteBitcodeToFile(*llvmModule, os);
- }
- llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
- "cloned module buffer");
- auto expectedModule = parseBitcodeFile(bufferRef, *ctx);
- if (!expectedModule)
- return expectedModule.takeError();
- std::unique_ptr<Module> deserModule = std::move(*expectedModule);
+ std::unique_ptr<Module> deserModule =
+ LLVM::cloneModuleIntoNewContext(ctx.get(), llvmModule.get());
auto dataLayout = deserModule->getDataLayout();
// Callback to create the object layer with symbol resolution to current
debugTranslation(
std::make_unique<DebugTranslation>(module, *this->llvmModule)),
ompDialect(
- module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()) {
+ module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()),
+ llvmDialect(module->getContext()->getRegisteredDialect<LLVMDialect>()) {
assert(satisfiesLLVMModule(mlirModule) &&
"mlirModule should honor LLVM's module semantics.");
}
/// Create named global variables that correspond to llvm.mlir.global
/// definitions.
LogicalResult ModuleTranslation::convertGlobals() {
+ // Lock access to the llvm context.
+ llvm::sys::SmartScopedLock<true> scopedLock(
+ llvmDialect->getLLVMContextMutex());
for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) {
llvm::Type *type = op.getType().getUnderlyingType();
llvm::Constant *cst = llvm::UndefValue::get(type);
}
LogicalResult ModuleTranslation::convertFunctions() {
+ // Lock access to the llvm context.
+ llvm::sys::SmartScopedLock<true> scopedLock(
+ llvmDialect->getLLVMContextMutex());
// Declare all functions first because there may be function calls that form a
// call graph with cycles.
for (auto function : getModuleBody(mlirModule).getOps<LLVMFuncOp>()) {
ModuleTranslation::prepareLLVMModule(Operation *m) {
auto *dialect = m->getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
assert(dialect && "LLVM dialect must be registered");
+ // Lock the LLVM context as we might create new types here.
+ llvm::sys::SmartScopedLock<true> scopedLock(dialect->getLLVMContextMutex());
auto llvmModule = llvm::CloneModule(dialect->getLLVMModule());
if (!llvmModule)
--- /dev/null
+// RUN: mlir-cuda-runner %s --print-ir-after-all --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s --dump-input=always
+
+// CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+func @main() {
+ %arg = alloc() : memref<13xi32>
+ %dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
+ %one = constant 1 : index
+ %sx = dim %dst, 0 : memref<?xi32>
+ call @mcuMemHostRegisterMemRef1dInt32(%dst) : (memref<?xi32>) -> ()
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ %t0 = index_cast %tx : index to i32
+ store %t0, %dst[%tx] : memref<?xi32>
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ %t0 = index_cast %tx : index to i32
+ store %t0, %dst[%tx] : memref<?xi32>
+ gpu.terminator
+ }
+ %U = memref_cast %dst : memref<?xi32> to memref<*xi32>
+ call @print_memref_i32(%U) : (memref<*xi32>) -> ()
+ return
+}
+
+func @mcuMemHostRegisterMemRef1dInt32(%ptr : memref<?xi32>)
+func @print_memref_i32(%ptr : memref<*xi32>)