/// Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
- /// If the loop has multiple stores to an invariant address, then
- /// return true, else return false.
- bool hasMultipleStoresToLoopInvariantAddress() const {
- return HasMultipleStoresToLoopInvariantAddress;
+ /// If the loop has memory dependence involving an invariant address, i.e. two
+ /// stores or a store and a load, then return true, else return false.
+ bool hasDependenceInvolvingLoopInvariantAddress() const {
+ return HasDependenceInvolvingLoopInvariantAddress;
}
/// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
/// Cache the result of analyzeLoop.
bool CanVecMem;
- /// Indicator that there are multiple stores to a uniform address.
- bool HasMultipleStoresToLoopInvariantAddress;
+ /// Indicator that there are non vectorizable stores to a uniform address.
+ bool HasDependenceInvolvingLoopInvariantAddress;
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
Value *Ptr = ST->getPointerOperand();
if (isUniform(Ptr))
- HasMultipleStoresToLoopInvariantAddress |=
+ HasDependenceInvolvingLoopInvariantAddress |=
!UniformStores.insert(Ptr).second;
// If we did *not* see this pointer before, insert it to the read-write
IsReadOnlyPtr = true;
}
+ // See if there is an unsafe dependency between a load to a uniform address and
+ // store to the same uniform address.
+ if (UniformStores.count(Ptr)) {
+ LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
+ "load and uniform store to the same address!\n");
+ HasDependenceInvolvingLoopInvariantAddress = true;
+ }
+
MemoryLocation Loc = MemoryLocation::get(LD);
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
- HasMultipleStoresToLoopInvariantAddress(false) {
+ HasDependenceInvolvingLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(AA, LI, TLI, DT);
}
PtrRtChecking->print(OS, Depth);
OS << "\n";
- OS.indent(Depth) << "Multiple stores to invariant address were "
- << (HasMultipleStoresToLoopInvariantAddress ? "" : "not ")
+ OS.indent(Depth) << "Non vectorizable stores to invariant address were "
+ << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
if (!LAI->canVectorizeMemory())
return false;
- if (LAI->hasMultipleStoresToLoopInvariantAddress()) {
+ if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "multiple writes to a loop invariant address could not "
+ << "write to a loop invariant address could not "
"be vectorized");
LLVM_DEBUG(
- dbgs() << "LV: We don't allow multiple stores to a uniform address\n");
+ dbgs() << "LV: Non vectorizable stores to a uniform address\n");
return false;
}
-
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getUnionPredicate());
; CHECK-NEXT: Group
; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
; CHECK-NEXT: Member: {%b,+,4}<%for.body>
-; CHECK: Multiple stores to invariant address were not found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; The LAA with the new PM is a loop pass so we go from inner to outer loops.
; OLDPM: for.cond1.preheader:
-; OLDPM: Multiple stores to invariant address were not found in loop.
+; OLDPM: Non vectorizable stores to invariant address were not found in loop.
; OLDPM: for.body3:
-; OLDPM: Multiple stores to invariant address were found in loop.
+; OLDPM: Non vectorizable stores to invariant address were found in loop.
; NEWPM: for.body3:
-; NEWPM: Multiple stores to invariant address were found in loop.
+; NEWPM: Non vectorizable stores to invariant address were found in loop.
; NEWPM: for.cond1.preheader:
-; NEWPM: Multiple stores to invariant address were not found in loop.
+; NEWPM: Non vectorizable stores to invariant address were not found in loop.
define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
entry:
; }
; }
-; CHECK: Multiple stores to invariant address were not found in loop.
-; CHECK-NOT: Multiple stores to invariant address were found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NOT: Non vectorizable stores to invariant address were found in loop.
define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
; }
; }
-; CHECK: Multiple stores to invariant address were not found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
entry:
for.end10: ; preds = %for.inc8, %entry
ret i32 undef
}
+
+; cannot vectorize loop with unsafe dependency between uniform load (%tmp10) and store
+; (%tmp12) to the same address
+; PR39653
+; Note: %tmp10 could be replaced by phi(%arg4, %tmp12), a potentially vectorizable
+; 1st-order-recurrence
+define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, i16* %arg3, i32 %arg4, i64 %arg5) {
+; CHECK-LABEL: unsafe_dep_uniform_load_store
+; CHECK-NOT: <4 x i32>
+bb:
+ %tmp = alloca i32
+ store i32 %arg4, i32* %tmp
+ %tmp6 = getelementptr inbounds i16, i16* %arg3, i64 %arg5
+ br label %bb7
+
+bb7:
+ %tmp8 = phi i64 [ 0, %bb ], [ %tmp24, %bb7 ]
+ %tmp9 = phi i32 [ %arg1, %bb ], [ %tmp23, %bb7 ]
+ %tmp10 = load i32, i32* %tmp
+ %tmp11 = mul nsw i32 %tmp9, %tmp10
+ %tmp12 = srem i32 %tmp11, 65536
+ %tmp13 = add nsw i32 %tmp12, %tmp9
+ %tmp14 = trunc i32 %tmp13 to i16
+ %tmp15 = trunc i64 %tmp8 to i32
+ %tmp16 = add i32 %arg, %tmp15
+ %tmp17 = zext i32 %tmp16 to i64
+ %tmp18 = getelementptr inbounds i16, i16* %tmp6, i64 %tmp17
+ store i16 %tmp14, i16* %tmp18, align 2
+ %tmp19 = add i32 %tmp13, %tmp9
+ %tmp20 = trunc i32 %tmp19 to i16
+ %tmp21 = and i16 %tmp20, 255
+ %tmp22 = getelementptr inbounds i16, i16* %arg3, i64 %tmp17
+ store i16 %tmp21, i16* %tmp22, align 2
+ %tmp23 = add nsw i32 %tmp9, 1
+ %tmp24 = add nuw nsw i64 %tmp8, 1
+ %tmp25 = icmp eq i64 %tmp24, %arg2
+ store i32 %tmp12, i32* %tmp
+ br i1 %tmp25, label %bb26, label %bb7
+
+bb26:
+ ret void
+}