[LV] Avoid vectorizing unsafe dependencies in uniform address

author Anna Thomas <anna@azul.com>

Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)

committer Anna Thomas <anna@azul.com>

Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)
author Anna Thomas <anna@azul.com>
Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)
committer Anna Thomas <anna@azul.com>
Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h

index c59c86c..cf24d9c 100644 (file)
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -564,10 +564,10 @@ public:
    /// Print the information about the memory accesses in the loop.
    void print(raw_ostream &OS, unsigned Depth = 0) const;
  
-  /// If the loop has multiple stores to an invariant address, then
-  /// return true, else return false.
-  bool hasMultipleStoresToLoopInvariantAddress() const {
-    return HasMultipleStoresToLoopInvariantAddress;
+  /// If the loop has memory dependence involving an invariant address, i.e. two
+  /// stores or a store and a load, then return true, else return false.
+  bool hasDependenceInvolvingLoopInvariantAddress() const {
+    return HasDependenceInvolvingLoopInvariantAddress;
    }
  
    /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
@@ -620,8 +620,8 @@ private:
    /// Cache the result of analyzeLoop.
    bool CanVecMem;
  
-  /// Indicator that there are multiple stores to a uniform address.
-  bool HasMultipleStoresToLoopInvariantAddress;
+  /// Indicator that there are non vectorizable stores to a uniform address.
+  bool HasDependenceInvolvingLoopInvariantAddress;
  
    /// The diagnostics report generated for the analysis.  E.g. why we
    /// couldn't analyze the loop.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp

index 3c6c2ab..bc01f04 100644 (file)
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1870,7 +1870,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
      Value *Ptr = ST->getPointerOperand();
  
      if (isUniform(Ptr))
-      HasMultipleStoresToLoopInvariantAddress |=
+      HasDependenceInvolvingLoopInvariantAddress |=
            !UniformStores.insert(Ptr).second;
  
      // If we did *not* see this pointer before, insert it to  the read-write
@@ -1914,6 +1914,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
        IsReadOnlyPtr = true;
      }
  
+    // See if there is an unsafe dependency between a load to a uniform address and
+    // store to the same uniform address.
+    if (UniformStores.count(Ptr)) {
+      LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
+                           "load and uniform store to the same address!\n");
+      HasDependenceInvolvingLoopInvariantAddress = true;
+    }
+
      MemoryLocation Loc = MemoryLocation::get(LD);
      // The TBAA metadata could have a control dependency on the predication
      // condition, so we cannot rely on it when determining whether or not we
@@ -2272,7 +2280,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
        PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
        DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
        NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
-      HasMultipleStoresToLoopInvariantAddress(false) {
+      HasDependenceInvolvingLoopInvariantAddress(false) {
    if (canAnalyzeLoop())
      analyzeLoop(AA, LI, TLI, DT);
  }
@@ -2304,8 +2312,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
    PtrRtChecking->print(OS, Depth);
    OS << "\n";
  
-  OS.indent(Depth) << "Multiple stores to invariant address were "
-                   << (HasMultipleStoresToLoopInvariantAddress ? "" : "not ")
+  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
+                   << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
                     << "found in loop.\n";
  
    OS.indent(Depth) << "SCEV assumptions:\n";
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index 755ad32..36f1cbd 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -817,15 +817,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
    if (!LAI->canVectorizeMemory())
      return false;
  
-  if (LAI->hasMultipleStoresToLoopInvariantAddress()) {
+  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
      ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
-              << "multiple writes to a loop invariant address could not "
+              << "write to a loop invariant address could not "
                   "be vectorized");
      LLVM_DEBUG(
-        dbgs() << "LV: We don't allow multiple stores to a uniform address\n");
+        dbgs() << "LV: Non vectorizable stores to a uniform address\n");
      return false;
    }
-
    Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
    PSE.addPredicate(LAI->getPSE().getUnionPredicate());
  
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll

index 0d0fe65..cb1b7ed 100644 (file)
--- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
@@ -39,7 +39,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  ; CHECK-NEXT:      Group
  ; CHECK-NEXT:        (Low: %b High: ((4 * (1 umax %x)) + %b))
  ; CHECK-NEXT:          Member: {%b,+,4}<%for.body>
-; CHECK:         Multiple stores to invariant address were not found in loop.
+; CHECK:         Non vectorizable stores to invariant address were not found in loop.
  ; CHECK-NEXT:    SCEV assumptions:
  ; CHECK-NEXT:    {1,+,1}<%for.body> Added Flags: <nusw>
  ; CHECK-NEXT:    {0,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll

index f24211d..611e957 100644 (file)
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
@@ -14,14 +14,14 @@
  ; The LAA with the new PM is a loop pass so we go from inner to outer loops.
  
  ; OLDPM: for.cond1.preheader:
-; OLDPM:   Multiple stores to invariant address were not found in loop.
+; OLDPM:   Non vectorizable stores to invariant address were not found in loop.
  ; OLDPM: for.body3:
-; OLDPM:   Multiple stores to invariant address were found in loop.
+; OLDPM:   Non vectorizable stores to invariant address were found in loop.
  
  ; NEWPM: for.body3:
-; NEWPM:   Multiple stores to invariant address were found in loop.
+; NEWPM:   Non vectorizable stores to invariant address were found in loop.
  ; NEWPM: for.cond1.preheader:
-; NEWPM:   Multiple stores to invariant address were not found in loop.
+; NEWPM:   Non vectorizable stores to invariant address were not found in loop.
  
  define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
  entry:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll

index 07bcdcc..d21cc69 100644 (file)
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
@@ -10,8 +10,8 @@
  ;    }
  ;  }
  
-; CHECK: Multiple stores to invariant address were not found in loop.
-; CHECK-NOT: Multiple stores to invariant address were found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NOT: Non vectorizable stores to invariant address were found in loop.
  
  
  define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll

index 8d74524..b25d79b 100644 (file)
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
@@ -10,7 +10,7 @@
  ;    }
  ;  }
  
-; CHECK: Multiple stores to invariant address were not found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
  
  define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
  entry:
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll

index 4155352..cf1257b 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -551,3 +551,45 @@ for.inc8:                                         ; preds = %for.body3, %for.con
  for.end10:                                        ; preds = %for.inc8, %entry
    ret i32 undef
  }
+
+; cannot vectorize loop with unsafe dependency between uniform load (%tmp10) and store
+; (%tmp12) to the same address
+; PR39653
+; Note: %tmp10 could be replaced by phi(%arg4, %tmp12), a potentially vectorizable
+; 1st-order-recurrence
+define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, i16* %arg3, i32 %arg4, i64 %arg5) {
+; CHECK-LABEL: unsafe_dep_uniform_load_store
+; CHECK-NOT: <4 x i32>
+bb:
+  %tmp = alloca i32
+  store i32 %arg4, i32* %tmp
+  %tmp6 = getelementptr inbounds i16, i16* %arg3, i64 %arg5
+  br label %bb7
+
+bb7:
+  %tmp8 = phi i64 [ 0, %bb ], [ %tmp24, %bb7 ]
+  %tmp9 = phi i32 [ %arg1, %bb ], [ %tmp23, %bb7 ]
+  %tmp10 = load i32, i32* %tmp
+  %tmp11 = mul nsw i32 %tmp9, %tmp10
+  %tmp12 = srem i32 %tmp11, 65536
+  %tmp13 = add nsw i32 %tmp12, %tmp9
+  %tmp14 = trunc i32 %tmp13 to i16
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = add i32 %arg, %tmp15
+  %tmp17 = zext i32 %tmp16 to i64
+  %tmp18 = getelementptr inbounds i16, i16* %tmp6, i64 %tmp17
+  store i16 %tmp14, i16* %tmp18, align 2
+  %tmp19 = add i32 %tmp13, %tmp9
+  %tmp20 = trunc i32 %tmp19 to i16
+  %tmp21 = and i16 %tmp20, 255
+  %tmp22 = getelementptr inbounds i16, i16* %arg3, i64 %tmp17
+  store i16 %tmp21, i16* %tmp22, align 2
+  %tmp23 = add nsw i32 %tmp9, 1
+  %tmp24 = add nuw nsw i64 %tmp8, 1
+  %tmp25 = icmp eq i64 %tmp24, %arg2
+  store i32 %tmp12, i32* %tmp
+  br i1 %tmp25, label %bb26, label %bb7
+
+bb26:
+  ret void
+}
author	Anna Thomas <anna@azul.com>
	Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)
committer	Anna Thomas <anna@azul.com>
	Mon, 19 Nov 2018 15:39:59 +0000 (15:39 +0000)
llvm/include/llvm/Analysis/LoopAccessAnalysis.h		patch \| blob \| history
llvm/lib/Analysis/LoopAccessAnalysis.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll		patch \| blob \| history
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll		patch \| blob \| history
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll		patch \| blob \| history
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll		patch \| blob \| history