// vectorize loop is made, runtime checks are added so as to make sure that
// invariant address won't alias with any other objects.
if (!LAI->getStoresToInvariantAddresses().empty()) {
- // For each invariant address, check its last stored value is unconditional.
+ // For each invariant address, check if last stored value is unconditional
+ // and the address is not calculated inside the loop.
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
- if (isInvariantStoreOfReduction(SI) &&
- blockNeedsPredication(SI->getParent())) {
+ if (!isInvariantStoreOfReduction(SI))
+ continue;
+
+ if (blockNeedsPredication(SI->getParent())) {
reportVectorizationFailure(
"We don't allow storing to uniform addresses",
"write of conditional recurring variant value to a loop "
"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
+
+ // Invariant address should be defined outside of loop. LICM pass usually
+ // makes sure it happens, but in rare cases it does not, we do not want
+ // to overcomplicate vectorization to support this case.
+ if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
+ if (TheLoop->contains(Ptr)) {
+ reportVectorizationFailure(
+ "Invariant address is calculated inside the loop",
+ "write to a loop invariant address could not "
+ "be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+ }
}
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
exit:
ret void
}
+
+define void @reduc_store_invariant_addr_not_hoisted(i32* %dst, i32* readonly %src) {
+; CHECK-LABEL: @reduc_store_invariant_addr_not_hoisted
+; CHECK-NOT: vector.body:
+entry:
+ br label %for.body
+
+for.body:
+ %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+ %0 = load i32, i32* %gep.src, align 4
+ %add = add nsw i32 %sum, %0
+ %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+ store i32 %add, i32* %gep.dst, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ ret void
+}