#include "polly/Support/ISLTools.h"
#include "polly/Support/SCEVValidator.h"
#include "polly/Support/ScopHelper.h"
+#include "polly/Support/VirtualInstruction.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
return NumberIterations + 1;
}
-/// Extract the values and SCEVs needed to generate code for a block.
-static int findReferencesInBlock(struct SubtreeReferences &References,
- const ScopStmt *Stmt, BasicBlock *BB) {
- for (Instruction &Inst : *BB) {
- // Include invariant loads
- if (isa<LoadInst>(Inst))
- if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
- References.Values.insert(InvariantLoad);
-
- for (Value *SrcVal : Inst.operands()) {
- auto *Scope = References.LI.getLoopFor(BB);
- if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
- References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
- continue;
- } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
- References.Values.insert(NewVal);
+static void findReferencesByUse(Value *SrcVal, ScopStmt *UserStmt,
+ Loop *UserScope, const ValueMapT &GlobalMap,
+ SetVector<Value *> &Values,
+ SetVector<const SCEV *> &SCEVs) {
+ VirtualUse VUse = VirtualUse::create(UserStmt, UserScope, SrcVal, true);
+ switch (VUse.getKind()) {
+ case VirtualUse::Constant:
+ // When accelerator-offloading, GlobalValue is a host address whose content
+ // must still be transferred to the GPU.
+ if (isa<GlobalValue>(SrcVal))
+ Values.insert(SrcVal);
+ break;
+
+ case VirtualUse::Synthesizable:
+ SCEVs.insert(VUse.getScevExpr());
+ return;
+
+ case VirtualUse::Block:
+ case VirtualUse::ReadOnly:
+ case VirtualUse::Hoisted:
+ case VirtualUse::Intra:
+ case VirtualUse::Inter:
+ break;
+ }
+
+ if (Value *NewVal = GlobalMap.lookup(SrcVal))
+ Values.insert(NewVal);
+}
+
+static void findReferencesInInst(Instruction *Inst, ScopStmt *UserStmt,
+ Loop *UserScope, const ValueMapT &GlobalMap,
+ SetVector<Value *> &Values,
+ SetVector<const SCEV *> &SCEVs) {
+ for (Use &U : Inst->operands())
+ findReferencesByUse(U.get(), UserStmt, UserScope, GlobalMap, Values, SCEVs);
+}
+
+static void findReferencesInStmt(ScopStmt *Stmt, SetVector<Value *> &Values,
+ ValueMapT &GlobalMap,
+ SetVector<const SCEV *> &SCEVs) {
+ LoopInfo *LI = Stmt->getParent()->getLI();
+
+ BasicBlock *BB = Stmt->getBasicBlock();
+ Loop *Scope = LI->getLoopFor(BB);
+ for (Instruction *Inst : Stmt->getInstructions())
+ findReferencesInInst(Inst, Stmt, Scope, GlobalMap, Values, SCEVs);
+
+ if (Stmt->isRegionStmt()) {
+ for (BasicBlock *BB : Stmt->getRegion()->blocks()) {
+ Loop *Scope = LI->getLoopFor(BB);
+ for (Instruction &Inst : *BB)
+ findReferencesInInst(&Inst, Stmt, Scope, GlobalMap, Values, SCEVs);
}
}
- return 0;
}
-void polly::addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
+void polly::addReferencesFromStmt(ScopStmt *Stmt, void *UserPtr,
bool CreateScalarRefs) {
auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
- if (Stmt->isBlockStmt())
- findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
- else if (Stmt->isRegionStmt()) {
- for (BasicBlock *BB : Stmt->getRegion()->blocks())
- findReferencesInBlock(References, Stmt, BB);
- } else {
- assert(Stmt->isCopyStmt());
- // Copy Stmts have no instructions that we need to consider.
- }
+ findReferencesInStmt(Stmt, References.Values, References.GlobalMap,
+ References.SCEVs);
for (auto &Access : *Stmt) {
if (References.ParamSpace) {
static void addReferencesFromStmtSet(isl::set Set,
struct SubtreeReferences *UserPtr) {
isl::id Id = Set.get_tuple_id();
- auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
- return addReferencesFromStmt(Stmt, UserPtr);
+ auto *Stmt = static_cast<ScopStmt *>(Id.get_user());
+ addReferencesFromStmt(Stmt, UserPtr);
}
/// Extract the out-of-scop values and SCEVs referenced from a union set
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \
+; RUN: -S < %s | \
+; RUN: FileCheck -check-prefix=HOST-IR %s
+
; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \
; RUN: -polly-codegen-ppcg -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck -check-prefix=KERNEL-IR %s
; REQUIRES: pollyacc
; Verify that invariant loads used in a kernel statement are correctly forwarded
; as subtree value to the GPU kernel.
-; CHECK: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0({{.*}} float %polly.access.p.load)
-; CHECK: store float %polly.access.p.load, float* %indvar2f.phiops
+; HOST-IR: store float %polly.access.p.load, float* %invariant.preload.s2a, align 4
+
+; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2({{.*}}i8 addrspace(1)* %MemRef_indvar2f__phi{{.*}})
+; KERNEL-IR: %indvar2f.phiops.reload = load float, float* %indvar2f.phiops, align 4
+; KERNEL-IR: store float %indvar2f.phiops.reload, float addrspace(1)* %polly.access.MemRef_A, align 4
+
+; FIXME: store float %indvar2f.phiops.reload, float* %indvar2f.phiops, align 4
+; For some reason the above instruction is emitted that stores back to the addess it was just loaded from.
define void @foo(float* %A, float* %p) {
entry:
%ptr = getelementptr float, float* %A, i64 %indvar
store float 42.0, float* %ptr
%cmp = icmp sle i64 %indvar, 1024
- br i1 %cmp, label %loop, label %loop2
+ br i1 %cmp, label %loop, label %anotherloop
-loop2:
- %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
- %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
+anotherloop:
+ %indvar2 = phi i64 [0, %loop], [%indvar2.next, %anotherloop]
+ %indvar2f = phi float [%invariant, %loop], [%indvar2f, %anotherloop]
%indvar2.next = add i64 %indvar2, 1
store float %indvar2f, float* %A
%cmp2 = icmp sle i64 %indvar2, 1024
- br i1 %cmp2, label %loop2, label %end
+ br i1 %cmp2, label %anotherloop, label %end
end:
ret void
; IR: [[REGC:%.+]] = bitcast i32* %{{[0-9]+}} to i8*
; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
-; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32 %0) #0 {
-; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %10
-; KERNEL-IR-NEXT: store i32 %0, i32 addrspace(1)* %polly.access.MemRef_c, align 4
+; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c) #0 {
+; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %9
+; KERNEL-IR-NEXT: store i32 422, i32 addrspace(1)* %polly.access.MemRef_c, align 4
define void @kernel_dynprog([50 x i32]* %c) {
entry:
for.body17: ; preds = %for.body17, %for.cond1.preheader
%indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
%arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
- store i32 undef, i32* %arrayidx69, align 4
+ store i32 422, i32* %arrayidx69, align 4
%indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
%lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
%exitcond75 = icmp ne i32 %lftr.wideiv74, 50