From 76c83b3595a534c5b28bd80039e2115358ba2291 Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Mon, 3 Apr 2023 09:07:55 +0000 Subject: [PATCH] [MLIR][LLVM] Put byval static allocas in the entry block. Reviewed By: gysit Differential Revision: https://reviews.llvm.org/D147311 --- mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp | 16 +++++++++--- mlir/test/Dialect/LLVMIR/inlining.mlir | 39 +++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp index b1a3fb7..71936ac 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -131,11 +131,19 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc, Value argument, Type elementType, unsigned elementTypeSize, unsigned targetAlignment) { + Block *entryBlock = &(*argument.getParentRegion()->begin()); // Allocate the new value on the stack. - Value one = builder.create(loc, builder.getI64Type(), - builder.getI64IntegerAttr(1)); - Value allocaOp = builder.create( - loc, argument.getType(), elementType, one, targetAlignment); + Value allocaOp; + { + // Since this is a static alloca, we can put it directly in the entry block, + // so they can be absorbed into the prologue/epilogue at code generation. + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointToStart(entryBlock); + Value one = builder.create(loc, builder.getI64Type(), + builder.getI64IntegerAttr(1)); + allocaOp = builder.create( + loc, argument.getType(), elementType, one, targetAlignment); + } // Copy the pointee to the newly allocated value. Value copySize = builder.create( loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize)); diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir index afaa776..873ab1d 100644 --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -381,10 +381,17 @@ llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) { // CHECK-LABEL: llvm.func @test_byval // CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr -// CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64 -// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]] llvm.func @test_byval(%ptr : !llvm.ptr) { + // Make sure the new static alloca goes to the entry block. + // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64 + // CHECK: llvm.br ^[[BB1:[a-zA-Z0-9_]+]] + llvm.br ^bb1 + // CHECK: ^[[BB1]] +^bb1: + // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]] llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> () + llvm.br ^bb2 +^bb2: llvm.return } @@ -442,15 +449,27 @@ llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 1 llvm.return } -// CHECK-LABEL: llvm.func @test_byval_alloca -llvm.func @test_byval_alloca() { - // Make sure only the unaligned alloca triggers a memcpy. - %size = llvm.mlir.constant(1 : i64) : i64 - // CHECK: %[[ALLOCA:.+]] = llvm.alloca {{.+}}alignment = 1 - // CHECK: "llvm.intr.memcpy"(%{{.+}}, %[[ALLOCA]] +// CHECK-LABEL: llvm.func @test_byval_unaligned_alloca +llvm.func @test_byval_unaligned_alloca() { + %size = llvm.mlir.constant(4 : i64) : i64 + // CHECK-DAG: %[[SRC:.+]] = llvm.alloca {{.+}}alignment = 1 : i64 + // CHECK-DAG: %[[DST:.+]] = llvm.alloca {{.+}}alignment = 16 : i64 + // CHECK: "llvm.intr.memcpy"(%[[DST]], %[[SRC]] %unaligned = llvm.alloca %size x i16 { alignment = 1 } : (i64) -> !llvm.ptr llvm.call @aligned_byval_arg(%unaligned) : (!llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 16 }) attributes {memory = #llvm.memory_effects} { + llvm.return +} + +// CHECK-LABEL: llvm.func @test_byval_aligned_alloca +llvm.func @test_byval_aligned_alloca() { // CHECK-NOT: memcpy + %size = llvm.mlir.constant(1 : i64) : i64 %aligned = llvm.alloca %size x i16 { alignment = 16 } : (i64) -> !llvm.ptr llvm.call @aligned_byval_arg(%aligned) : (!llvm.ptr) -> () llvm.return @@ -468,8 +487,8 @@ llvm.func @aligned_byval_arg(%ptr : !llvm.ptr { llvm.byval = i16, llvm.align = 1 // CHECK-LABEL: llvm.func @test_byval_global llvm.func @test_byval_global() { // Make sure only the unaligned global triggers a memcpy. - // CHECK: %[[UNALIGNED:.+]] = llvm.mlir.addressof @unaligned_global - // CHECK: %[[ALLOCA:.+]] = llvm.alloca + // CHECK-DAG: %[[UNALIGNED:.+]] = llvm.mlir.addressof @unaligned_global + // CHECK-DAG: %[[ALLOCA:.+]] = llvm.alloca // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[UNALIGNED]] // CHECK-NOT: llvm.alloca %unaligned = llvm.mlir.addressof @unaligned_global : !llvm.ptr -- 2.7.4