/// \brief Annotate memory instructions in the versioned loop with no-alias
/// metadata based on the memchecks issued.
+ ///
+ /// This is just wrapper that calls prepareNoAliasMetadata and
+ /// annotateInstWithNoAlias on the instructions of the versioned loop.
void annotateLoopWithNoAlias();
+ /// \brief Set up the aliasing scopes based on the memchecks. This needs to
+ /// be called before the first call to annotateInstWithNoAlias.
+ void prepareNoAliasMetadata();
+
+ /// \brief Add the noalias annotations to \p VersionedInst.
+ ///
+ /// \p OrigInst is the instruction corresponding to \p VersionedInst in the
+ /// original loop. Initialize the aliasing scopes with
+ /// prepareNoAliasMetadata once before this can be called.
+ void annotateInstWithNoAlias(Instruction *VersionedInst,
+ const Instruction *OrigInst);
+
private:
/// \brief Adds the necessary PHI nodes for the versioned loops based on the
/// loop-defined values used outside of the loop.
/// that are used outside the loop.
void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
- /// \brief Set up the aliasing scopes based on the memchecks. This needs to
- /// be called before the first call to annotateInstWithNoAlias.
- void prepareNoAliasMetadata();
-
/// \brief Add the noalias annotations to \p I. Initialize the aliasing
/// scopes with prepareNoAliasMetadata once before this can be called.
- void annotateInstWithNoAlias(Instruction *I);
+ void annotateInstWithNoAlias(Instruction *I) {
+ annotateInstWithNoAlias(I, I);
+ }
/// \brief The original loop. This becomes the "versioned" one. I.e.,
/// control flows here if pointers in the loop don't alias.
}
}
-void LoopVersioning::annotateInstWithNoAlias(Instruction *I) {
+void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
+ const Instruction *OrigInst) {
if (!AnnotateNoAlias)
return;
LLVMContext &Context = VersionedLoop->getHeader()->getContext();
- Value *Ptr = isa<LoadInst>(I) ? cast<LoadInst>(I)->getPointerOperand()
- : cast<StoreInst>(I)->getPointerOperand();
+ const Value *Ptr = isa<LoadInst>(OrigInst)
+ ? cast<LoadInst>(OrigInst)->getPointerOperand()
+ : cast<StoreInst>(OrigInst)->getPointerOperand();
// Find the group for the pointer and then add the scope metadata.
auto Group = PtrToGroup.find(Ptr);
if (Group != PtrToGroup.end()) {
- I->setMetadata(
+ VersionedInst->setMetadata(
LLVMContext::MD_alias_scope,
- MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope),
- MDNode::get(Context, GroupToScope[Group->second])));
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(Context, GroupToScope[Group->second])));
// Add the no-alias metadata.
auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second);
if (NonAliasingScopeList != GroupToNonAliasingScopeList.end())
- I->setMetadata(
+ VersionedInst->setMetadata(
LLVMContext::MD_noalias,
- MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias),
- NonAliasingScopeList->second));
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_noalias),
+ NonAliasingScopeList->second));
}
}
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
/// Emit bypass checks to check any memory assumptions we may have made.
void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+ /// Add additional metadata to \p To that was not present on \p Orig.
+ ///
+ /// Currently this is used to add the noalias annotations based on the
+ /// inserted memchecks. Use this for instructions that are *cloned* into the
+ /// vector loop.
+ void addNewMetadata(Instruction *To, const Instruction *Orig);
+
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Instruction *To, const Instruction *From);
+
+ /// \brief Similar to the previous function but it adds the metadata to a
+ /// vector of instructions.
+ void addMetadata(SmallVectorImpl<Value *> &To, const Instruction *From);
+
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
/// then a single scalar value is mapped to multiple vector parts. The parts
/// Target Transform Info.
const TargetTransformInfo *TTI;
+ /// \brief LoopVersioning. It's only set up (non-null) if memchecks were
+ /// used.
+ ///
+ /// This is currently only used to add no-alias metadata based on the
+ /// memchecks. The actually versioning is performed manually.
+ std::unique_ptr<LoopVersioning> LVer;
+
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
}
}
-/// \brief Propagate known metadata from one instruction to a vector of others.
-static void propagateMetadata(SmallVectorImpl<Value *> &To,
- const Instruction *From) {
+void InnerLoopVectorizer::addNewMetadata(Instruction *To,
+ const Instruction *Orig) {
+ // If the loop was versioned with memchecks, add the corresponding no-alias
+ // metadata.
+ if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
+ LVer->annotateInstWithNoAlias(To, Orig);
+}
+
+void InnerLoopVectorizer::addMetadata(Instruction *To,
+ const Instruction *From) {
+ propagateMetadata(To, From);
+ addNewMetadata(To, From);
+}
+
+void InnerLoopVectorizer::addMetadata(SmallVectorImpl<Value *> &To,
+ const Instruction *From) {
for (Value *V : To)
if (Instruction *I = dyn_cast<Instruction>(V))
- propagateMetadata(I, From);
+ addMetadata(I, From);
}
/// \brief The group of interleaved loads/stores sharing the same stride and
Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
}
- propagateMetadata(NewLoadInstr, Instr);
+ addMetadata(NewLoadInstr, Instr);
}
return;
}
Instruction *NewStoreInstr =
Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
- propagateMetadata(NewStoreInstr, Instr);
+ addMetadata(NewStoreInstr, Instr);
}
}
NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr,
Alignment);
}
- propagateMetadata(NewSI, SI);
+ addMetadata(NewSI, SI);
}
return;
}
NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
}
- propagateMetadata(NewLI, LI);
+ addMetadata(NewLI, LI);
}
}
Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
Cloned->setOperand(op, Op);
}
+ addNewMetadata(Cloned, Instr);
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
LoopBypassBlocks.push_back(BB);
AddedSafetyChecks = true;
+
+ // We currently don't use LoopVersioning for the actual loop cloning but we
+ // still use it to add the noalias metadata.
+ LVer = llvm::make_unique<LoopVersioning>(*Legal->getLAI(), OrigLoop, LI, DT,
+ PSE.getSE());
+ LVer->prepareNoAliasMetadata();
}
Entry[Part] = V;
}
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
case Instruction::Select: {
Op1[Part]);
}
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
Entry[Part] = C;
}
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
CI->getType(), II.getStepValue()->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
/// Vectorize casts.
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
Entry[Part] = Builder.CreateCall(VectorF, Args);
}
- propagateMetadata(Entry, &*it);
+ addMetadata(Entry, &*it);
break;
}
--- /dev/null
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -licm -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; In order to vectorize the inner loop, it needs to be versioned with
+; memchecks between {A} x {B, C} first:
+;
+; for (i = 0; i < n; i++)
+; for (j = 0; j < m; j++)
+; A[j] += B[i] + C[j];
+;
+; Since in the versioned vector loop A and B can no longer alias, B[i] can be
+; LICM'ed from the inner loop.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %outer
+
+outer:
+ %i.2 = phi i64 [ 0, %entry ], [ %i, %inner.end ]
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %i.2
+ br label %inner.ph
+
+inner.ph:
+; CHECK: vector.ph:
+; CHECK: load i32, i32* %arrayidxB,
+; CHECK: br label %vector.body
+ br label %inner
+
+inner:
+ %j.2 = phi i64 [ 0, %inner.ph ], [ %j, %inner ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %j.2
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %j.2
+ %loadC = load i32, i32* %arrayidxC, align 4
+
+ %add = add nuw i32 %loadA, %loadB
+ %add2 = add nuw i32 %add, %loadC
+
+ store i32 %add2, i32* %arrayidxA, align 4
+
+ %j = add nuw nsw i64 %j.2, 1
+ %cond1 = icmp eq i64 %j, 20
+ br i1 %cond1, label %inner.end, label %inner
+
+inner.end:
+ %i = add nuw nsw i64 %i.2, 1
+ %cond2 = icmp eq i64 %i, 30
+ br i1 %cond2, label %outer.end, label %outer
+
+outer.end:
+ ret void
+}
--- /dev/null
+; RUN: opt -basicaa -loop-vectorize -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s \
+; RUN: | FileCheck %s -check-prefix=BOTH -check-prefix=LV
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -dse -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s \
+; RUN: | FileCheck %s -check-prefix=BOTH -check-prefix=DSE
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This loop needs to be versioned with memchecks between {A, B} x {C} before
+; it can be vectorized.
+;
+; for (i = 0; i < n; i++) {
+; C[i] = A[i] + 1;
+; C[i] += B[i];
+; }
+;
+; Check that the corresponding noalias metadata is added to the vector loop
+; but not to the scalar loop.
+;
+; Since in the versioned vector loop C and B can no longer alias, the first
+; store to C[i] can be DSE'd.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %for.body
+
+; BOTH: vector.memcheck:
+; BOTH: vector.body:
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; Scope 1
+; LV: = load {{.*}} !alias.scope !0
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %add = add nuw i32 %loadA, 2
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE-NOT: store
+ store i32 %add, i32* %arrayidxC, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; Scope 6
+; LV: = load {{.*}} !alias.scope !7
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %add2 = add nuw i32 %add, %loadB
+
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE: store
+ store i32 %add2, i32* %arrayidxC, align 4
+
+ %inc = add nuw nsw i64 %ind, 1
+ %exitcond = icmp eq i64 %inc, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+; BOTH: for.body:
+; BOTH-NOT: !alias.scope
+; BOTH-NOT: !noalias
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; LV: !0 = !{!1}
+; LV: !1 = distinct !{!1, !2}
+; LV: !2 = distinct !{!2, !"LVerDomain"}
+; LV: !3 = !{!4}
+; LV: !4 = distinct !{!4, !2}
+; LV: !5 = !{!1, !6}
+; LV: !6 = distinct !{!6, !2}
+; LV: !7 = !{!6}