#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const Function *F, const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {}
+ AssumptionTracker *AT, const Function *F,
+ const LoopVectorizeHints *Hints)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+ AT(AT), TheFunction(F), Hints(Hints) {
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+ }
/// Information about vectorization costs
struct VectorizationFactor {
*TheFunction, DL, Message.str());
}
+ /// Values used only by @llvm.assume calls.
+ SmallPtrSet<const Value *, 32> EphValues;
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ /// Tracker for @llvm.assume.
+ AssumptionTracker *AT;
const Function *TheFunction;
// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
+ AssumptionTracker *AT;
bool DisableUnrolling;
bool AlwaysVectorize;
BFI = &getAnalysis<BlockFrequencyInfo>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AT, F,
+ &Hints);
// Check the function attributes to find out if this function should be
// optimized for size.
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addRequired<BlockFrequencyInfo>();
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
switch (ID) {
+ case Intrinsic::assume:
case Intrinsic::lifetime_end:
case Intrinsic::lifetime_start:
scalarizeInstruction(it);
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType();
+ // Ignore ephemeral values.
+ if (EphValues.count(it))
+ continue;
+
// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
+ // Ignore ephemeral values.
+ if (EphValues.count(I))
+ continue;
+
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
for (unsigned int j=0, e = List.size(); j < e; ++j)
if (isa<DbgInfoIntrinsic>(it))
continue;
+ // Ignore ephemeral values.
+ if (EphValues.count(it))
+ continue;
+
unsigned C = getInstructionCost(it, VF);
// Check if we should override the cost.
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
--- /dev/null
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @test1
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %cmp1 = fcmp ogt float %0, 1.000000e+02
+ tail call void @llvm.assume(i1 %cmp1)
+ %add = fadd float %0, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %add, float* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv, 1599
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+%struct.data = type { float*, float* }
+
+; Function Attrs: nounwind uwtable
+define void @test2(%struct.data* nocapture readonly %d) #0 {
+entry:
+ %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1
+ %0 = load float** %b, align 8
+ %ptrint = ptrtoint float* %0 to i64
+ %maskedptr = and i64 %ptrint, 31
+ %maskcond = icmp eq i64 %maskedptr, 0
+ %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0
+ %1 = load float** %a, align 8
+ %ptrint2 = ptrtoint float* %1 to i64
+ %maskedptr3 = and i64 %ptrint2, 31
+ %maskcond4 = icmp eq i64 %maskedptr3, 0
+ br label %for.body
+
+; CHECK-LABEL: @test2
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ tail call void @llvm.assume(i1 %maskcond)
+ %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv
+ %2 = load float* %arrayidx, align 4
+ %add = fadd float %2, 1.000000e+00
+ tail call void @llvm.assume(i1 %maskcond4)
+ %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv
+ store float %add, float* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv, 1599
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+