From 5acd6e05221574643feb8477ad07e89fd766ec53 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 17 Jan 2022 18:23:44 +0100 Subject: [PATCH] [AsyncToLLVM] Align frames to 64 bytes Coroutine lowering always takes the natural alignment when spilling to the frame (issue #53148) so using AVX2 or AVX512 in a coroutine doesn't work. Always overalign to 64 bytes to avoid this issue until we have a better solution. Differential Revision: https://reviews.llvm.org/D117501 --- mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp | 52 +++++++--------------- .../AsyncToLLVM/convert-coro-to-llvm.mlir | 3 +- 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp index a3ed0e1..7d50470 100644 --- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp +++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Async/IR/Async.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h" @@ -228,37 +229,6 @@ static void addAsyncRuntimeApiDeclarations(ModuleOp module) { } //===----------------------------------------------------------------------===// -// Add malloc/free declarations to the module. -//===----------------------------------------------------------------------===// - -static constexpr const char *kMalloc = "malloc"; -static constexpr const char *kFree = "free"; - -static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder, - StringRef name, Type ret, ArrayRef params) { - if (module.lookupSymbol(name)) - return; - Type type = LLVM::LLVMFunctionType::get(ret, params); - builder.create(name, type); -} - -/// Adds malloc/free declarations to the module. -static void addCRuntimeDeclarations(ModuleOp module) { - using namespace mlir::LLVM; - - MLIRContext *ctx = module.getContext(); - auto builder = - ImplicitLocOpBuilder::atBlockEnd(module.getLoc(), module.getBody()); - - auto voidTy = LLVMVoidType::get(ctx); - auto i64 = IntegerType::get(ctx, 64); - auto i8Ptr = LLVMPointerType::get(IntegerType::get(ctx, 8)); - - addLLVMFuncDecl(module, builder, kMalloc, i8Ptr, {i64}); - addLLVMFuncDecl(module, builder, kFree, voidTy, {i8Ptr}); -} - -//===----------------------------------------------------------------------===// // Coroutine resume function wrapper. //===----------------------------------------------------------------------===// @@ -365,11 +335,18 @@ public: // Get coroutine frame size: @llvm.coro.size.i64. auto coroSize = rewriter.create(loc, rewriter.getI64Type()); + // The coroutine lowering doesn't properly account for alignment of the + // frame, so align everything to 64 bytes which ought to be enough for + // everyone. https://llvm.org/PR53148 + auto coroAlign = rewriter.create( + op->getLoc(), rewriter.getI64Type(), rewriter.getI64IntegerAttr(64)); // Allocate memory for the coroutine frame. + auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn( + op->getParentOfType(), rewriter.getI64Type()); auto coroAlloc = rewriter.create( - loc, i8Ptr, SymbolRefAttr::get(rewriter.getContext(), kMalloc), - ValueRange(coroSize.getResult())); + loc, i8Ptr, SymbolRefAttr::get(allocFuncOp), + ValueRange{coroAlign, coroSize.getResult()}); // Begin a coroutine: @llvm.coro.begin. auto coroId = CoroBeginOpAdaptor(adaptor.getOperands()).id(); @@ -401,9 +378,11 @@ public: rewriter.create(loc, i8Ptr, adaptor.getOperands()); // Free the memory. - rewriter.replaceOpWithNewOp( - op, TypeRange(), SymbolRefAttr::get(rewriter.getContext(), kFree), - ValueRange(coroMem.getResult())); + auto freeFuncOp = + LLVM::lookupOrCreateFreeFn(op->getParentOfType()); + rewriter.replaceOpWithNewOp(op, TypeRange(), + SymbolRefAttr::get(freeFuncOp), + ValueRange(coroMem.getResult())); return success(); } @@ -968,7 +947,6 @@ void ConvertAsyncToLLVMPass::runOnOperation() { // We delay adding the resume function until it's needed because it currently // fails to compile unless '-O0' is specified. addAsyncRuntimeApiDeclarations(module); - addCRuntimeDeclarations(module); // Lower async.runtime and async.coro operations to Async Runtime API and // LLVM coroutine intrinsics. diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir index 7e47448..1377854 100644 --- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir +++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir @@ -14,7 +14,8 @@ func @coro_begin() { // CHECK: %[[ID:.*]] = llvm.intr.coro.id %0 = async.coro.id // CHECK: %[[SIZE:.*]] = llvm.intr.coro.size : i64 - // CHECK: %[[ALLOC:.*]] = llvm.call @malloc(%[[SIZE]]) + // CHECK: %[[ALIGN:.*]] = llvm.mlir.constant(64 : i64) : i64 + // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[SIZE]]) // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]] %1 = async.coro.begin %0 return -- 2.7.4