///
/// @param Inst The instruction to be analyzed
/// @param R The SCoP region
- void buildScalarDependences(Instruction *Inst, Region *R);
+ ///
+ /// @return True if the Instruction is used in other BB and a scalar write
+ /// Access is required.
+ bool buildScalarDependences(Instruction *Inst, Region *R);
void buildAccessFunctions(Region &RefRegion, BasicBlock &BB);
}
}
-void TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) {
+bool TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R) {
// No need to translate these scalar dependences into polyhedral form, because
// synthesizable scalars can be generated by the code generator.
if (canSynthesize(Inst, LI, SE, R))
- return;
+ return false;
bool AnyCrossStmtUse = false;
BasicBlock *ParentBB = Inst->getParent();
AccFuncMap[UseParent].push_back(std::make_pair(ScalarAccess, U));
}
- // If the Instruction is used outside the statement, we need to build the
- // write access.
- if (AnyCrossStmtUse) {
- IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true);
- AccFuncMap[ParentBB].push_back(std::make_pair(ScalarAccess, Inst));
- }
+ return AnyCrossStmtUse;
}
IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) {
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
Functions.push_back(std::make_pair(buildIRAccess(Inst, L, &R), Inst));
- if (!isa<StoreInst>(Inst))
- buildScalarDependences(Inst, &R);
+ if (!isa<StoreInst>(Inst) && buildScalarDependences(Inst, &R)) {
+ // If the Instruction is used outside the statement, we need to build the
+ // write access.
+ IRAccess ScalarAccess(IRAccess::SCALARWRITE, Inst, ZeroOffset, 1, true);
+ Functions.push_back(std::make_pair(ScalarAccess, Inst));
+ }
}
if (Functions.empty())
--- /dev/null
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+
+; void f(long A[], int N, int *init_ptr) {
+; long i, j;
+;
+; for (i = 0; i < N; ++i) {
+; init = *init_ptr;
+; for (i = 0; i < N; ++i) {
+; A[i] = init + 2;
+; }
+; }
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
+entry:
+ br label %for.i
+
+for.i: ; preds = %for.i.end, %entry
+ %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
+ %indvar.i.next = add nsw i64 %indvar.i, 1
+ br label %entry.next
+
+entry.next: ; preds = %for.i
+ %init = load i64* %init_ptr
+; CHECK: BB: entry.next
+; CHECK: Read init_ptr[0]
+; CHECK: Write init[0]
+ br label %for.j
+
+for.j: ; preds = %for.j, %entry.next
+ %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
+ %init_plus_two = add i64 %init, 2
+; CHECK: Read init[0]
+; CHECK: Write A[{0,+,8}<%for.j>]
+ %scevgep = getelementptr i64* %A, i64 %indvar.j
+ store i64 %init_plus_two, i64* %scevgep
+ %indvar.j.next = add nsw i64 %indvar.j, 1
+ %exitcond.j = icmp eq i64 %indvar.j.next, %N
+ br i1 %exitcond.j, label %for.i.end, label %for.j
+
+for.i.end: ; preds = %for.j
+ %exitcond.i = icmp eq i64 %indvar.i.next, %N
+ br i1 %exitcond.i, label %return, label %for.i
+
+return: ; preds = %for.i.end
+ ret void
+}
+
+attributes #0 = { nounwind }
--- /dev/null
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+
+; void f(long A[], int N, int *init_ptr) {
+; long i, j;
+;
+; for (i = 0; i < N; ++i) {
+; init = *init_ptr;
+; for (i = 0; i < N; ++i) {
+; init2 = *init_ptr;
+; A[i] = init + init2;
+; }
+; }
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
+entry:
+ br label %for.i
+
+for.i: ; preds = %for.i.end, %entry
+ %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
+ %indvar.i.next = add nsw i64 %indvar.i, 1
+ br label %entry.next
+
+entry.next: ; preds = %for.i
+ %init = load i64* %init_ptr
+; CHECK: BB: entry.next
+; CHECK: Read init_ptr[0]
+; CHECK: Write init[0]
+ br label %for.j
+
+for.j: ; preds = %for.j, %entry.next
+ %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
+ %init_2 = load i64* %init_ptr
+ %init_sum = add i64 %init, %init_2
+; CHECK: BB: for.j
+; CHECK: Read init[0]
+; CHECK: Read init_ptr[0]
+; CHECK: Write A[{0,+,8}<%for.j>]
+ %scevgep = getelementptr i64* %A, i64 %indvar.j
+ store i64 %init_sum, i64* %scevgep
+ %indvar.j.next = add nsw i64 %indvar.j, 1
+ %exitcond.j = icmp eq i64 %indvar.j.next, %N
+ br i1 %exitcond.j, label %for.i.end, label %for.j
+
+for.i.end: ; preds = %for.j
+ %exitcond.i = icmp eq i64 %indvar.i.next, %N
+ br i1 %exitcond.i, label %return, label %for.i
+
+return: ; preds = %for.i.end
+ ret void
+}
+
+attributes #0 = { nounwind }
--- /dev/null
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+
+; void f(long A[], int N, int *init_ptr) {
+; long i, j;
+;
+; for (i = 0; i < N; ++i) {
+; for (i = 0; i < N; ++i) {
+; init = *init_ptr;
+; A[i] = init + 2;
+; }
+; }
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
+entry:
+ br label %for.i
+
+for.i: ; preds = %for.i.end, %entry
+ %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
+ %indvar.i.next = add nsw i64 %indvar.i, 1
+ br label %entry.next
+
+entry.next: ; preds = %for.i
+ br label %for.j
+
+for.j: ; preds = %for.j, %entry.next
+ %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
+ %init = load i64* %init_ptr
+ %init_plus_two = add i64 %init, 2
+ %scevgep = getelementptr i64* %A, i64 %indvar.j
+ store i64 %init_plus_two, i64* %scevgep
+; CHECK: BB: for.j
+; CHECK: Read init_ptr[0]
+; CHECK: Write A[{0,+,8}<%for.j>]
+ %indvar.j.next = add nsw i64 %indvar.j, 1
+ %exitcond.j = icmp eq i64 %indvar.j.next, %N
+ br i1 %exitcond.j, label %for.i.end, label %for.j
+
+for.i.end: ; preds = %for.j
+ %exitcond.i = icmp eq i64 %indvar.i.next, %N
+ br i1 %exitcond.i, label %return, label %for.i
+
+return: ; preds = %for.i.end
+ ret void
+}
+
+attributes #0 = { nounwind }
--- /dev/null
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -analyze < %s | FileCheck %s
+
+; ModuleID = 'scalar_to_array.ll'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [1024 x float] zeroinitializer, align 8
+
+; CHECK: empty
+; Function Attrs: nounwind
+define i32 @empty() #0 {
+entry:
+ fence seq_cst
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvar, 1024
+ br i1 %exitcond, label %for.body, label %return
+
+for.body: ; preds = %for.cond
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvar.next = add i64 %indvar, 1
+ br label %for.cond
+
+return: ; preds = %for.cond
+ fence seq_cst
+ ret i32 0
+}
+
+; CHECK: array_access
+; Function Attrs: nounwind
+define i32 @array_access() #0 {
+entry:
+ fence seq_cst
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvar, 1024
+ br i1 %exitcond, label %for.body, label %return
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %float = uitofp i64 %indvar to float
+ store float %float, float* %arrayidx
+ br label %for.inc
+; CHECK: BB: for.body
+; CHECK-NOT: Read
+; CHECK: Write A[{0,+,4}<%for.cond>]
+
+for.inc: ; preds = %for.body
+ %indvar.next = add i64 %indvar, 1
+ br label %for.cond
+
+return: ; preds = %for.cond
+ fence seq_cst
+ ret i32 0
+}
+
+; Function Attrs: nounwind
+; CHECK: intra_scop_dep
+define i32 @intra_scop_dep() #0 {
+entry:
+ fence seq_cst
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvar, 1024
+ br i1 %exitcond, label %for.body.a, label %return
+
+for.body.a: ; preds = %for.cond
+ %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %scalar = load float* %arrayidx
+ br label %for.body.b
+; CHECK: BB: for.body.a
+; CHECK: Read A[{0,+,4}<%for.cond>]
+; CHECK: Write scalar[0]
+
+for.body.b: ; preds = %for.body.a
+ %arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %float = uitofp i64 %indvar to float
+ %sum = fadd float %scalar, %float
+ store float %sum, float* %arrayidx2
+ br label %for.inc
+; CHECK: BB: for.body.b
+; CHECK: Read scalar[0]
+; CHECK: Write A[{0,+,4}<%for.cond>]
+
+for.inc: ; preds = %for.body.b
+ %indvar.next = add i64 %indvar, 1
+ br label %for.cond
+
+return: ; preds = %for.cond
+ fence seq_cst
+ ret i32 0
+}
+
+; It is not possible to have a scop which accesses a scalar element that is
+; a global variable. All global variables are pointers containing possibly
+; a single element. Hence they do not need to be handled anyways.
+; Please note that this is still required when scalar to array rewritting is
+; disabled.
+
+; CHECK: use_after_scop
+; Function Attrs: nounwind
+define i32 @use_after_scop() #0 {
+entry:
+ %scalar.s2a = alloca float
+ fence seq_cst
+ br label %for.head
+
+for.head: ; preds = %for.inc, %entry
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
+ br label %for.body
+
+for.body: ; preds = %for.head
+ %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %scalar = load float* %arrayidx
+ store float %scalar, float* %scalar.s2a
+; Escaped uses are still required to be rewritten to stack variable.
+; CHECK: BB: for.body
+; CHECK: Read A[{0,+,4}<%for.head>]
+; CHECK: Write scalar.s2a[0]
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp ne i64 %indvar, 1024
+ br i1 %exitcond, label %for.head, label %for.after
+
+for.after: ; preds = %for.inc
+ %scalar.loadoutside = load float* %scalar.s2a
+ fence seq_cst
+ %return_value = fptosi float %scalar.loadoutside to i32
+ br label %return
+
+return: ; preds = %for.after
+ ret i32 %return_value
+}
+
+; We currently do not transform scalar references, that have only read accesses
+; in the scop. There are two reasons for this:
+;
+; o We don't introduce additional memory references which may yield to compile
+; time overhead.
+; o For integer values, such a translation may block the use of scalar
+; evolution on those values.
+;
+; CHECK: before_scop
+; Function Attrs: nounwind
+define i32 @before_scop() #0 {
+entry:
+ br label %preheader
+
+preheader: ; preds = %entry
+ %scalar = fadd float 4.000000e+00, 5.000000e+00
+ fence seq_cst
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %preheader
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ]
+ %exitcond = icmp ne i64 %indvar, 1024
+ br i1 %exitcond, label %for.body, label %return
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ store float %scalar, float* %arrayidx
+ br label %for.inc
+; CHECK: BB: for.body
+; CHECK: Write A[{0,+,4}<%for.cond>]
+
+for.inc: ; preds = %for.body
+ %indvar.next = add i64 %indvar, 1
+ br label %for.cond
+
+return: ; preds = %for.cond
+ fence seq_cst
+ ret i32 0
+}
+
+attributes #0 = { nounwind }
; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze -disable-polly-intra-scop-scalar-to-array < %s | FileCheck %s -check-prefix=SCALARACCESS
; void f(long A[], int N, int *init_ptr) {
; long i, j;
entry.next:
; CHECK: BB: entry.next
+; SCALARACCESS: BB: entry.next
%init = load i64* %init_ptr
; CHECK: Read init_ptr[0]
; CHECK: Write init.s2a[0]
+; SCALARACCESS: Read init_ptr[0]
+; SCALARACCESS: Write init[0]
br label %for.j
for.j:
; CHECK: BB: for.j
; CHECK: Read init.s2a[0]
; CHECK: Write A[{0,+,8}<%for.j>]
+
+; SCALARACCESS: BB: for.j
+; SCALARACCESS: Read init
+; SCALARACCESS: Write A[{0,+,8}<%for.j>]
%init_plus_two = add i64 %init, 2
%scevgep = getelementptr i64* %A, i64 %indvar.j
store i64 %init_plus_two, i64* %scevgep
return:
ret void
}
+
+define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
+entry:
+ br label %for.i
+
+for.i:
+ %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
+ %indvar.i.next = add nsw i64 %indvar.i, 1
+ br label %entry.next
+
+entry.next:
+; SCALARACCESS: BB: entry.next
+ %init = load i64* %init_ptr
+; SCALARACCESS: Read init_ptr[0]
+; SCALARACCESS: Write init[0]
+ br label %for.j
+
+for.j:
+; SCALARACCESS: BB: for.j
+ %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
+ %scevgep = getelementptr i64* %A, i64 %indvar.j
+ store i64 %init, i64* %scevgep
+; SCALARACCESS: Read init
+; SCALARACCESS: Write A[{0,+,8}<%for.j>]
+ %indvar.j.next = add nsw i64 %indvar.j, 1
+ %exitcond.j = icmp eq i64 %indvar.j.next, %N
+ br i1 %exitcond.j, label %for.i.end, label %for.j
+
+for.i.end:
+ %exitcond.i = icmp eq i64 %indvar.i.next, %N
+ br i1 %exitcond.i, label %return, label %for.i
+
+return:
+ ret void
+}