From 68dc3c7ab2ccaa4a5a1459ba506ef340ed23a5ed Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Wed, 15 Oct 2014 23:44:41 +0000
Subject: [PATCH] Preserve non-byval pointer alignment attributes using
 @llvm.assume when inlining

For pointer-typed function arguments, enhanced alignment can be asserted using
the 'align' attribute. When inlining, if this enhanced alignment information is
not otherwise available, preserve it using @llvm.assume-based alignment
assumptions.

llvm-svn: 219876
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 45 +++++++++++++
 llvm/test/Transforms/Inline/align.ll         | 98 ++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/align.ll
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 8d09f52..ca33e33 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -47,6 +47,11 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
   cl::Hidden,
   cl::desc("Convert noalias attributes to metadata during inlining."));
 
+static cl::opt<bool>
+PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
+  cl::init(true), cl::Hidden,
+  cl::desc("Convert align attributes to assumptions during inlining."));
+
 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
                           bool InsertLifetime) {
   return InlineFunction(CallSite(CI), IFI, InsertLifetime);
@@ -616,6 +621,41 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
   }
 }
 
+/// If the inlined function has non-byval align arguments, then
+/// add @llvm.assume-based alignment assumptions to preserve this information.
+static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
+  if (!PreserveAlignmentAssumptions || !IFI.DL)
+    return;
+
+  // To avoid inserting redundant assumptions, we should check for assumptions
+  // already in the caller. To do this, we might need a DT of the caller.
+  DominatorTree DT;
+  bool DTCalculated = false;
+
+  const Function *CalledFunc = CS.getCalledFunction();
+  for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+       E = CalledFunc->arg_end(); I != E; ++I) {
+    unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0;
+    if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) {
+      if (!DTCalculated) {
+        DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent()
+                                               ->getParent()));
+        DTCalculated = true;
+      }
+
+      // If we can already prove the asserted alignment in the context of the
+      // caller, then don't bother inserting the assumption.
+      Value *Arg = CS.getArgument(I->getArgNo());
+      if (getKnownAlignment(Arg, IFI.DL, IFI.AT, CS.getInstruction(),
+                            &DT) >= Align)
+        continue;
+
+      IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
+                                                                 Align);
+    }
+  }
+}
+
 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
 /// into the caller, update the specified callgraph to reflect the changes we
 /// made.  Note that it's possible that not all code was copied over, so only
@@ -943,6 +983,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       VMap[I] = ActualArg;
     }
 
+    // Add alignment assumptions if necessary. We do this before the inlined
+    // instructions are actually cloned into the caller so that we can easily
+    // check what will be known at the start of the inlined code.
+    AddAlignmentAssumptions(CS, IFI);
+
     // We want the inliner to prune the code as it copies.  We would LOVE to
     // have no dead or constant instructions leftover after inlining occurs
     // (which can happen, e.g., because an argument was constant), but we'll be
diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll
new file mode 100644
index 0000000..9ac6d54
--- /dev/null
+++ b/llvm/test/Transforms/Inline/align.ll
@@ -0,0 +1,98 @@
+; RUN: opt -inline -preserve-alignment-assumptions-during-inlining -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %ptrint1 = ptrtoint float* %b to i64
+; CHECK:   %maskedptr2 = and i64 %ptrint1, 127
+; CHECK:   %maskcond3 = icmp eq i64 %maskedptr2, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond3)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
-- 
2.7.4