Use SCEV information for the second level aliasing
authorRoman Gareev <gareevroman@gmail.com>
Tue, 8 Aug 2017 16:50:28 +0000 (16:50 +0000)
committerRoman Gareev <gareevroman@gmail.com>
Tue, 8 Aug 2017 16:50:28 +0000 (16:50 +0000)
We introduce another level of alias metadata to distinguish the individual
non-aliasing accesses that have inter iteration alias-free base pointers
marked with "Inter iteration alias-free" mark nodes. To distinguish two
accesses, the comparison of raw pointers representing base pointers is used.

In case of, for example, ublas's prod function that implements GEMM, and
DeLiCM we can get accesses to same location represented by different raw
pointers. Consequently, we create different alias sets that can prevent
accesses from, for example, being sinked or hoisted.

To avoid the issue, we compare the corresponding SCEV information instead
of the corresponding raw pointers.

Reviewed-by: Tobias Grosser <tobias@grosser.es>
Differential Revision: https://reviews.llvm.org/D35761

llvm-svn: 310380

polly/include/polly/CodeGen/IRBuilder.h
polly/lib/CodeGen/IRBuilder.cpp
polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop [new file with mode: 0644]
polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop.transformed [new file with mode: 0644]
polly/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll [new file with mode: 0644]

index 2d75f99..47039c0 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/ValueMap.h"
 
@@ -115,11 +116,10 @@ private:
       OtherAliasScopeListMap;
 
   /// A map from pointers to second level alias scopes.
-  llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
-      SecondLevelAliasScopeMap;
+  llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *> SecondLevelAliasScopeMap;
 
   /// A map from pointers to second level alias scope list of other pointers.
-  llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+  llvm::DenseMap<const llvm::SCEV *, llvm::MDNode *>
       SecondLevelOtherAliasScopeListMap;
 
   /// Inter iteration alias-free base pointers.
index 7e8c3ad..94cdda2 100644 (file)
@@ -140,12 +140,14 @@ static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
 
 void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
                                         llvm::Value *BasePtr) {
-  auto *Ptr = getMemAccInstPointerOperand(Inst);
-  if (!Ptr)
+  auto *PtrSCEV = SE->getSCEV(getMemAccInstPointerOperand(Inst));
+  auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV);
+
+  if (!PtrSCEV)
     return;
-  auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(Ptr);
+  auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(PtrSCEV);
   auto SecondLevelOtherAliasScopeList =
-      SecondLevelOtherAliasScopeListMap.lookup(Ptr);
+      SecondLevelOtherAliasScopeListMap.lookup(PtrSCEV);
   if (!SecondLevelAliasScope) {
     auto AliasScope = AliasScopeMap.lookup(BasePtr);
     if (!AliasScope)
@@ -153,16 +155,16 @@ void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
     LLVMContext &Ctx = SE->getContext();
     SecondLevelAliasScope = getID(
         Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
-    SecondLevelAliasScopeMap[Ptr] = SecondLevelAliasScope;
+    SecondLevelAliasScopeMap[PtrSCEV] = SecondLevelAliasScope;
     Metadata *Args = {SecondLevelAliasScope};
     auto SecondLevelBasePtrAliasScopeList =
-        SecondLevelAliasScopeMap.lookup(BasePtr);
-    SecondLevelAliasScopeMap[BasePtr] = MDNode::concatenate(
+        SecondLevelAliasScopeMap.lookup(BasePtrSCEV);
+    SecondLevelAliasScopeMap[BasePtrSCEV] = MDNode::concatenate(
         SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
     auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
     SecondLevelOtherAliasScopeList = MDNode::concatenate(
         OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
-    SecondLevelOtherAliasScopeListMap[Ptr] = SecondLevelOtherAliasScopeList;
+    SecondLevelOtherAliasScopeListMap[PtrSCEV] = SecondLevelOtherAliasScopeList;
   }
   Inst->setMetadata("alias.scope", SecondLevelAliasScope);
   Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
diff --git a/polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop b/polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop
new file mode 100644 (file)
index 0000000..41f7a70
--- /dev/null
@@ -0,0 +1,55 @@
+{
+   "arrays" : [
+      {
+         "name" : "MemRef_C1",
+         "sizes" : [ "*" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_A",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_B",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_C",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      }
+   ],
+   "context" : "{  :  }",
+   "name" : "%for.body---%for.end24",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
+      }
+   ]
+}
diff --git a/polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop.transformed b/polly/test/ScheduleOptimizer/kernel_gemm___%for.body---%for.end24.jscop.transformed
new file mode 100644 (file)
index 0000000..1b0e4de
--- /dev/null
@@ -0,0 +1,55 @@
+{
+   "arrays" : [
+      {
+         "name" : "MemRef_C1",
+         "sizes" : [ "*" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_A",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_B",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      },
+      {
+         "name" : "MemRef_C",
+         "sizes" : [ "*", "1024" ],
+         "type" : "double"
+      }
+   ],
+   "context" : "{  :  }",
+   "name" : "%for.body---%for.end24",
+   "statements" : [
+      {
+         "accesses" : [
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[i0, i2] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }"
+            },
+            {
+               "kind" : "read",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C1[0] }"
+            },
+            {
+               "kind" : "write",
+               "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }"
+            }
+         ],
+         "domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }",
+         "name" : "Stmt_for_body6",
+         "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }"
+      }
+   ]
+}
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll
new file mode 100644 (file)
index 0000000..ef3ca76
--- /dev/null
@@ -0,0 +1,64 @@
+; RUN: opt %loadPolly -polly-import-jscop -polly-opt-isl  \
+; RUN: -polly-target-throughput-vector-fma=1 \
+; RUN: -polly-target-latency-vector-fma=8 \
+; RUN: -polly-target-1st-cache-level-associativity=8 \
+; RUN: -polly-target-2nd-cache-level-associativity=8 \
+; RUN: -polly-target-1st-cache-level-size=32768 \
+; RUN: -polly-target-vector-register-bitwidth=256 \
+; RUN: -polly-target-2nd-cache-level-size=262144 \
+; RUN: -polly-import-jscop-postfix=transformed -polly-codegen -S < %s \
+; RUN: | FileCheck %s
+;
+; Check that we do not create different alias sets for locations represented by
+; different raw pointers.
+;
+; CHECK-NOT: !76 = distinct !{!76, !5, !"second level alias metadata"}
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %A, [1024 x double]* %B, [1024 x double]* %C, double* %C1) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc22, %entry.split
+  %indvars.iv43 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next44, %for.inc22 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc19, %for.body
+  %indvars.iv40 = phi i64 [ 0, %for.body ], [ %indvars.iv.next41, %for.inc19 ]
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body6 ]
+  %tmp = load double, double* %C1, align 8
+  %arrayidx9 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv43, i64 %indvars.iv
+  %tmp1 = load double, double* %arrayidx9, align 8
+  %arrayidx13 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv40
+  %tmp2 = load double, double* %arrayidx13, align 8
+  %mul = fmul double %tmp1, %tmp2
+  %add = fadd double %tmp, %mul
+  %arrayidx17 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv43, i64 %indvars.iv40
+  %tmp3 = load double, double* %arrayidx17, align 8
+  %add18 = fadd double %tmp3, %add
+  store double %add18, double* %arrayidx17, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.body6, label %for.inc19
+
+for.inc19:                                        ; preds = %for.body6
+  %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
+  %exitcond42 = icmp ne i64 %indvars.iv.next41, 1024
+  br i1 %exitcond42, label %for.body3, label %for.inc22
+
+for.inc22:                                        ; preds = %for.inc19
+  %indvars.iv.next44 = add nuw nsw i64 %indvars.iv43, 1
+  %exitcond45 = icmp ne i64 %indvars.iv.next44, 1024
+  br i1 %exitcond45, label %for.body, label %for.end24
+
+for.end24:                                        ; preds = %for.inc22
+  ret void
+}