From 68dc3c7ab2ccaa4a5a1459ba506ef340ed23a5ed Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 15 Oct 2014 23:44:41 +0000 Subject: [PATCH] Preserve non-byval pointer alignment attributes using @llvm.assume when inlining For pointer-typed function arguments, enhanced alignment can be asserted using the 'align' attribute. When inlining, if this enhanced alignment information is not otherwise available, preserve it using @llvm.assume-based alignment assumptions. llvm-svn: 219876 --- llvm/lib/Transforms/Utils/InlineFunction.cpp | 45 +++++++++++++ llvm/test/Transforms/Inline/align.ll | 98 ++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 llvm/test/Transforms/Inline/align.ll diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 8d09f52..ca33e33 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -47,6 +47,11 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt +PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", + cl::init(true), cl::Hidden, + cl::desc("Convert align attributes to assumptions during inlining.")); + bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, bool InsertLifetime) { return InlineFunction(CallSite(CI), IFI, InsertLifetime); @@ -616,6 +621,41 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, } } +/// If the inlined function has non-byval align arguments, then +/// add @llvm.assume-based alignment assumptions to preserve this information. +static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { + if (!PreserveAlignmentAssumptions || !IFI.DL) + return; + + // To avoid inserting redundant assumptions, we should check for assumptions + // already in the caller. To do this, we might need a DT of the caller. + DominatorTree DT; + bool DTCalculated = false; + + const Function *CalledFunc = CS.getCalledFunction(); + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I) { + unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; + if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + if (!DTCalculated) { + DT.recalculate(const_cast(*CS.getInstruction()->getParent() + ->getParent())); + DTCalculated = true; + } + + // If we can already prove the asserted alignment in the context of the + // caller, then don't bother inserting the assumption. + Value *Arg = CS.getArgument(I->getArgNo()); + if (getKnownAlignment(Arg, IFI.DL, IFI.AT, CS.getInstruction(), + &DT) >= Align) + continue; + + IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg, + Align); + } + } +} + /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee /// into the caller, update the specified callgraph to reflect the changes we /// made. Note that it's possible that not all code was copied over, so only @@ -943,6 +983,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, VMap[I] = ActualArg; } + // Add alignment assumptions if necessary. We do this before the inlined + // instructions are actually cloned into the caller so that we can easily + // check what will be known at the start of the inlined code. + AddAlignmentAssumptions(CS, IFI); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll new file mode 100644 index 0000000..9ac6d54 --- /dev/null +++ b/llvm/test/Transforms/Inline/align.ll @@ -0,0 +1,98 @@ +; RUN: opt -inline -preserve-alignment-assumptions-during-inlining -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 { +entry: + %0 = load float* %c, align 4 + %arrayidx = getelementptr inbounds float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float* %c, align 4 + %arrayidx = getelementptr inbounds float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %ptrint = ptrtoint float* %a to i64 +; CHECK: %maskedptr = and i64 %ptrint, 127 +; CHECK: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: call void @llvm.assume(i1 %maskcond) +; CHECK: %0 = load float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %1 = load float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float* %c, align 4 + %arrayidx = getelementptr inbounds float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %1 = load float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = load float* %c, align 4 + %arrayidx = getelementptr inbounds float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float* %b, i64 8 + store float %0, float* %arrayidx1, align 4 + ret void +} + +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + tail call void @hello2(float* %a, float* %b, float* %c) + %0 = load float* %c, align 4 + %arrayidx = getelementptr inbounds float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %ptrint = ptrtoint float* %a to i64 +; CHECK: %maskedptr = and i64 %ptrint, 127 +; CHECK: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: call void @llvm.assume(i1 %maskcond) +; CHECK: %ptrint1 = ptrtoint float* %b to i64 +; CHECK: %maskedptr2 = and i64 %ptrint1, 127 +; CHECK: %maskcond3 = icmp eq i64 %maskedptr2, 0 +; CHECK: call void @llvm.assume(i1 %maskcond3) +; CHECK: %0 = load float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %arrayidx1.i = getelementptr inbounds float* %b, i64 8 +; CHECK: store float %0, float* %arrayidx1.i, align 4 +; CHECK: %1 = load float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +attributes #0 = { nounwind uwtable } + -- 2.7.4