From 63cc9467af2bf66768221e1b4ab427b671983632 Mon Sep 17 00:00:00 2001 From: Hongbin Zheng Date: Tue, 16 Jul 2013 15:20:29 +0000 Subject: [PATCH] Ensure a correct order between memory accesses. Ensure that the scalar write access corresponds to the result of a load instruction appears after the generic read access corresponds to the load instruction. llvm-svn: 186419 --- polly/include/polly/TempScopInfo.h | 5 +- polly/lib/Analysis/TempScopInfo.cpp | 19 +-- polly/test/TempScop/inter_bb_scalar_dep.ll | 54 ++++++ .../test/TempScop/intra_and_inter_bb_scalar_dep.ll | 58 +++++++ polly/test/TempScop/intra_bb_scalar_dep.ll | 52 ++++++ polly/test/TempScop/scalar_to_array.ll | 186 +++++++++++++++++++++ polly/test/TempScop/tempscop-printing.ll | 43 +++++ 7 files changed, 406 insertions(+), 11 deletions(-) create mode 100644 polly/test/TempScop/inter_bb_scalar_dep.ll create mode 100644 polly/test/TempScop/intra_and_inter_bb_scalar_dep.ll create mode 100644 polly/test/TempScop/intra_bb_scalar_dep.ll create mode 100644 polly/test/TempScop/scalar_to_array.ll diff --git a/polly/include/polly/TempScopInfo.h b/polly/include/polly/TempScopInfo.h index da84a71..56c72bf 100755 --- a/polly/include/polly/TempScopInfo.h +++ b/polly/include/polly/TempScopInfo.h @@ -292,7 +292,10 @@ class TempScopInfo : public FunctionPass { /// /// @param Inst The instruction to be analyzed /// @param R The SCoP region - void buildScalarDependences(Instruction *Inst, Region *R); + /// + /// @return True if the Instruction is used in other BB and a scalar write + /// Access is required. + bool buildScalarDependences(Instruction *Inst, Region *R); void buildAccessFunctions(Region &RefRegion, BasicBlock &BB); diff --git a/polly/lib/Analysis/TempScopInfo.cpp b/polly/lib/Analysis/TempScopInfo.cpp index 4a9733f..5aa8d96 100644 --- a/polly/lib/Analysis/TempScopInfo.cpp +++ b/polly/lib/Analysis/TempScopInfo.cpp @@ -104,11 +104,11 @@ void TempScop::printDetail(raw_ostream &OS, ScalarEvolution *SE, LoopInfo *LI, } } -void TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) { +bool TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) { // No need to translate these scalar dependences into polyhedral form, because // synthesizable scalars can be generated by the code generator. if (canSynthesize(Inst, LI, SE, R)) - return; + return false; bool AnyCrossStmtUse = false; BasicBlock *ParentBB = Inst->getParent(); @@ -147,12 +147,7 @@ void TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) { AccFuncMap[UseParent].push_back(std::make_pair(ScalarAccess, U)); } - // If the Instruction is used outside the statement, we need to build the - // write access. - if (AnyCrossStmtUse) { - IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true); - AccFuncMap[ParentBB].push_back(std::make_pair(ScalarAccess, Inst)); - } + return AnyCrossStmtUse; } IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) { @@ -190,8 +185,12 @@ void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB) { if (isa(Inst) || isa(Inst)) Functions.push_back(std::make_pair(buildIRAccess(Inst, L, &R), Inst)); - if (!isa(Inst)) - buildScalarDependences(Inst, &R); + if (!isa(Inst) && buildScalarDependences(Inst, &R)) { + // If the Instruction is used outside the statement, we need to build the + // write access. + IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true); + Functions.push_back(std::make_pair(ScalarAccess, Inst)); + } } if (Functions.empty()) diff --git a/polly/test/TempScop/inter_bb_scalar_dep.ll b/polly/test/TempScop/inter_bb_scalar_dep.ll new file mode 100644 index 0000000..e83bf5b --- /dev/null +++ b/polly/test/TempScop/inter_bb_scalar_dep.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s + +; void f(long A[], int N, int *init_ptr) { +; long i, j; +; +; for (i = 0; i < N; ++i) { +; init = *init_ptr; +; for (i = 0; i < N; ++i) { +; A[i] = init + 2; +; } +; } +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { +entry: + br label %for.i + +for.i: ; preds = %for.i.end, %entry + %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] + %indvar.i.next = add nsw i64 %indvar.i, 1 + br label %entry.next + +entry.next: ; preds = %for.i + %init = load i64* %init_ptr +; CHECK: BB: entry.next +; CHECK: Read init_ptr[0] +; CHECK: Write init[0] + br label %for.j + +for.j: ; preds = %for.j, %entry.next + %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] + %init_plus_two = add i64 %init, 2 +; CHECK: Read init[0] +; CHECK: Write A[{0,+,8}<%for.j>] + %scevgep = getelementptr i64* %A, i64 %indvar.j + store i64 %init_plus_two, i64* %scevgep + %indvar.j.next = add nsw i64 %indvar.j, 1 + %exitcond.j = icmp eq i64 %indvar.j.next, %N + br i1 %exitcond.j, label %for.i.end, label %for.j + +for.i.end: ; preds = %for.j + %exitcond.i = icmp eq i64 %indvar.i.next, %N + br i1 %exitcond.i, label %return, label %for.i + +return: ; preds = %for.i.end + ret void +} + +attributes #0 = { nounwind } diff --git a/polly/test/TempScop/intra_and_inter_bb_scalar_dep.ll b/polly/test/TempScop/intra_and_inter_bb_scalar_dep.ll new file mode 100644 index 0000000..ce4c49f --- /dev/null +++ b/polly/test/TempScop/intra_and_inter_bb_scalar_dep.ll @@ -0,0 +1,58 @@ +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s + +; void f(long A[], int N, int *init_ptr) { +; long i, j; +; +; for (i = 0; i < N; ++i) { +; init = *init_ptr; +; for (i = 0; i < N; ++i) { +; init2 = *init_ptr; +; A[i] = init + init2; +; } +; } +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { +entry: + br label %for.i + +for.i: ; preds = %for.i.end, %entry + %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] + %indvar.i.next = add nsw i64 %indvar.i, 1 + br label %entry.next + +entry.next: ; preds = %for.i + %init = load i64* %init_ptr +; CHECK: BB: entry.next +; CHECK: Read init_ptr[0] +; CHECK: Write init[0] + br label %for.j + +for.j: ; preds = %for.j, %entry.next + %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] + %init_2 = load i64* %init_ptr + %init_sum = add i64 %init, %init_2 +; CHECK: BB: for.j +; CHECK: Read init[0] +; CHECK: Read init_ptr[0] +; CHECK: Write A[{0,+,8}<%for.j>] + %scevgep = getelementptr i64* %A, i64 %indvar.j + store i64 %init_sum, i64* %scevgep + %indvar.j.next = add nsw i64 %indvar.j, 1 + %exitcond.j = icmp eq i64 %indvar.j.next, %N + br i1 %exitcond.j, label %for.i.end, label %for.j + +for.i.end: ; preds = %for.j + %exitcond.i = icmp eq i64 %indvar.i.next, %N + br i1 %exitcond.i, label %return, label %for.i + +return: ; preds = %for.i.end + ret void +} + +attributes #0 = { nounwind } diff --git a/polly/test/TempScop/intra_bb_scalar_dep.ll b/polly/test/TempScop/intra_bb_scalar_dep.ll new file mode 100644 index 0000000..d6e4211 --- /dev/null +++ b/polly/test/TempScop/intra_bb_scalar_dep.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s + +; void f(long A[], int N, int *init_ptr) { +; long i, j; +; +; for (i = 0; i < N; ++i) { +; for (i = 0; i < N; ++i) { +; init = *init_ptr; +; A[i] = init + 2; +; } +; } +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { +entry: + br label %for.i + +for.i: ; preds = %for.i.end, %entry + %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] + %indvar.i.next = add nsw i64 %indvar.i, 1 + br label %entry.next + +entry.next: ; preds = %for.i + br label %for.j + +for.j: ; preds = %for.j, %entry.next + %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] + %init = load i64* %init_ptr + %init_plus_two = add i64 %init, 2 + %scevgep = getelementptr i64* %A, i64 %indvar.j + store i64 %init_plus_two, i64* %scevgep +; CHECK: BB: for.j +; CHECK: Read init_ptr[0] +; CHECK: Write A[{0,+,8}<%for.j>] + %indvar.j.next = add nsw i64 %indvar.j, 1 + %exitcond.j = icmp eq i64 %indvar.j.next, %N + br i1 %exitcond.j, label %for.i.end, label %for.j + +for.i.end: ; preds = %for.j + %exitcond.i = icmp eq i64 %indvar.i.next, %N + br i1 %exitcond.i, label %return, label %for.i + +return: ; preds = %for.i.end + ret void +} + +attributes #0 = { nounwind } diff --git a/polly/test/TempScop/scalar_to_array.ll b/polly/test/TempScop/scalar_to_array.ll new file mode 100644 index 0000000..c2225c4 --- /dev/null +++ b/polly/test/TempScop/scalar_to_array.ll @@ -0,0 +1,186 @@ +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -analyze < %s | FileCheck %s + +; ModuleID = 'scalar_to_array.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 8 + +; CHECK: empty +; Function Attrs: nounwind +define i32 @empty() #0 { +entry: + fence seq_cst + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body, label %return + +for.body: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 + br label %for.cond + +return: ; preds = %for.cond + fence seq_cst + ret i32 0 +} + +; CHECK: array_access +; Function Attrs: nounwind +define i32 @array_access() #0 { +entry: + fence seq_cst + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body, label %return + +for.body: ; preds = %for.cond + %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %float = uitofp i64 %indvar to float + store float %float, float* %arrayidx + br label %for.inc +; CHECK: BB: for.body +; CHECK-NOT: Read +; CHECK: Write A[{0,+,4}<%for.cond>] + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 + br label %for.cond + +return: ; preds = %for.cond + fence seq_cst + ret i32 0 +} + +; Function Attrs: nounwind +; CHECK: intra_scop_dep +define i32 @intra_scop_dep() #0 { +entry: + fence seq_cst + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body.a, label %return + +for.body.a: ; preds = %for.cond + %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %scalar = load float* %arrayidx + br label %for.body.b +; CHECK: BB: for.body.a +; CHECK: Read A[{0,+,4}<%for.cond>] +; CHECK: Write scalar[0] + +for.body.b: ; preds = %for.body.a + %arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %float = uitofp i64 %indvar to float + %sum = fadd float %scalar, %float + store float %sum, float* %arrayidx2 + br label %for.inc +; CHECK: BB: for.body.b +; CHECK: Read scalar[0] +; CHECK: Write A[{0,+,4}<%for.cond>] + +for.inc: ; preds = %for.body.b + %indvar.next = add i64 %indvar, 1 + br label %for.cond + +return: ; preds = %for.cond + fence seq_cst + ret i32 0 +} + +; It is not possible to have a scop which accesses a scalar element that is +; a global variable. All global variables are pointers containing possibly +; a single element. Hence they do not need to be handled anyways. +; Please note that this is still required when scalar to array rewritting is +; disabled. + +; CHECK: use_after_scop +; Function Attrs: nounwind +define i32 @use_after_scop() #0 { +entry: + %scalar.s2a = alloca float + fence seq_cst + br label %for.head + +for.head: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] + br label %for.body + +for.body: ; preds = %for.head + %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %scalar = load float* %arrayidx + store float %scalar, float* %scalar.s2a +; Escaped uses are still required to be rewritten to stack variable. +; CHECK: BB: for.body +; CHECK: Read A[{0,+,4}<%for.head>] +; CHECK: Write scalar.s2a[0] + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.head, label %for.after + +for.after: ; preds = %for.inc + %scalar.loadoutside = load float* %scalar.s2a + fence seq_cst + %return_value = fptosi float %scalar.loadoutside to i32 + br label %return + +return: ; preds = %for.after + ret i32 %return_value +} + +; We currently do not transform scalar references, that have only read accesses +; in the scop. There are two reasons for this: +; +; o We don't introduce additional memory references which may yield to compile +; time overhead. +; o For integer values, such a translation may block the use of scalar +; evolution on those values. +; +; CHECK: before_scop +; Function Attrs: nounwind +define i32 @before_scop() #0 { +entry: + br label %preheader + +preheader: ; preds = %entry + %scalar = fadd float 4.000000e+00, 5.000000e+00 + fence seq_cst + br label %for.cond + +for.cond: ; preds = %for.inc, %preheader + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body, label %return + +for.body: ; preds = %for.cond + %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + store float %scalar, float* %arrayidx + br label %for.inc +; CHECK: BB: for.body +; CHECK: Write A[{0,+,4}<%for.cond>] + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 + br label %for.cond + +return: ; preds = %for.cond + fence seq_cst + ret i32 0 +} + +attributes #0 = { nounwind } diff --git a/polly/test/TempScop/tempscop-printing.ll b/polly/test/TempScop/tempscop-printing.ll index 3e990bd..04b5c67 100644 --- a/polly/test/TempScop/tempscop-printing.ll +++ b/polly/test/TempScop/tempscop-printing.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze -disable-polly-intra-scop-scalar-to-array < %s | FileCheck %s -check-prefix=SCALARACCESS ; void f(long A[], int N, int *init_ptr) { ; long i, j; @@ -25,9 +26,12 @@ for.i: entry.next: ; CHECK: BB: entry.next +; SCALARACCESS: BB: entry.next %init = load i64* %init_ptr ; CHECK: Read init_ptr[0] ; CHECK: Write init.s2a[0] +; SCALARACCESS: Read init_ptr[0] +; SCALARACCESS: Write init[0] br label %for.j for.j: @@ -35,6 +39,10 @@ for.j: ; CHECK: BB: for.j ; CHECK: Read init.s2a[0] ; CHECK: Write A[{0,+,8}<%for.j>] + +; SCALARACCESS: BB: for.j +; SCALARACCESS: Read init +; SCALARACCESS: Write A[{0,+,8}<%for.j>] %init_plus_two = add i64 %init, 2 %scevgep = getelementptr i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep @@ -49,3 +57,38 @@ for.i.end: return: ret void } + +define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { +entry: + br label %for.i + +for.i: + %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] + %indvar.i.next = add nsw i64 %indvar.i, 1 + br label %entry.next + +entry.next: +; SCALARACCESS: BB: entry.next + %init = load i64* %init_ptr +; SCALARACCESS: Read init_ptr[0] +; SCALARACCESS: Write init[0] + br label %for.j + +for.j: +; SCALARACCESS: BB: for.j + %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] + %scevgep = getelementptr i64* %A, i64 %indvar.j + store i64 %init, i64* %scevgep +; SCALARACCESS: Read init +; SCALARACCESS: Write A[{0,+,8}<%for.j>] + %indvar.j.next = add nsw i64 %indvar.j, 1 + %exitcond.j = icmp eq i64 %indvar.j.next, %N + br i1 %exitcond.j, label %for.i.end, label %for.j + +for.i.end: + %exitcond.i = icmp eq i64 %indvar.i.next, %N + br i1 %exitcond.i, label %return, label %for.i + +return: + ret void +} -- 2.7.4