From 275a1756ad113a2a58bba1fd621ba1921e0e94f0 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 24 Feb 2015 16:16:32 +0000 Subject: [PATCH] Allow non-affine control flow -- Code Generation This is the code generation for region statements that are created when non-affine control flow was present in the input. A new generator, similar to the block or vector generator, for regions is used to traverse and copy the region statement and to adjust the control flow inside the new region in the end. llvm-svn: 230340 --- polly/include/polly/CodeGen/BlockGenerators.h | 45 ++++++++++-- polly/lib/CodeGen/BlockGenerators.cpp | 81 ++++++++++++++++++++-- polly/lib/CodeGen/IslCodeGeneration.cpp | 13 +++- polly/test/Isl/CodeGen/non_affine_float_compare.ll | 78 +++++++++++++++++++++ 4 files changed, 205 insertions(+), 12 deletions(-) create mode 100644 polly/test/Isl/CodeGen/non_affine_float_compare.ll diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 77522ab..776105b 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -84,12 +84,12 @@ public: /// This copies the entire basic block and updates references to old values /// with references to new values, as defined by GlobalMap. /// - /// @param Stmt The statement to code generate. + /// @param Stmt The block statement to code generate. /// @param GlobalMap A mapping from old values to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block). /// @param LTS A map from old loops to new induction variables as SCEVs. - void copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); + void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); protected: PollyIRBuilder &Builder; @@ -100,6 +100,21 @@ protected: /// @brief The dominator tree of this function. DominatorTree &DT; + /// @brief Copy the given basic block. + /// + /// @param Stmt The statement to code generate. + /// @param BB The basic block to code generate. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + /// + /// @returns The copy of the basic block. + BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, + ValueMapT &GlobalMap, LoopToScevMapT <S); + /// @brief Get the new version of a value. /// /// Given an old value, we first check if a new version of this value is @@ -209,7 +224,7 @@ public: std::vector &VLTS, __isl_keep isl_map *Schedule) { VectorBlockGenerator Generator(BlockGen, GlobalMaps, VLTS, Schedule); - Generator.copyBB(Stmt); + Generator.copyStmt(Stmt); } private: @@ -322,7 +337,29 @@ private: void copyInstruction(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); - void copyBB(ScopStmt &Stmt); + void copyStmt(ScopStmt &Stmt); +}; + +/// @brief Generator for new versions of polyhedral region statements. +class RegionGenerator : public BlockGenerator { +public: + /// @brief Create a generator for regions. + /// + /// @param BlockGen A generator for basic blocks. + RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {} + + /// @brief Copy the region statement @p Stmt. + /// + /// This copies the entire region represented by @p Stmt and updates + /// references to old values with references to new values, as defined by + /// GlobalMap. + /// + /// @param Stmt The statement to code generate. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); }; } #endif diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 8b6abf1..26ac56de 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -284,18 +284,29 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); } -void BlockGenerator::copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, - LoopToScevMapT <S) { +void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + assert(Stmt.isBlockStmt() && + "Only block statements can be copied by the block generator"); + + ValueMapT BBMap; + BasicBlock *BB = Stmt.getBasicBlock(); + copyBB(Stmt, BB, BBMap, GlobalMap, LTS); +} + +BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &BBMap, ValueMapT &GlobalMap, + LoopToScevMapT <S) { BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); CopyBB->setName("polly.stmt." + BB->getName()); Builder.SetInsertPoint(CopyBB->begin()); - ValueMapT BBMap; - for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); + + return CopyBB; } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -620,7 +631,10 @@ void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); } -void VectorBlockGenerator::copyBB(ScopStmt &Stmt) { +void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { + assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " + "the vector block generator"); + BasicBlock *BB = Stmt.getBasicBlock(); BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); @@ -647,3 +661,60 @@ void VectorBlockGenerator::copyBB(ScopStmt &Stmt) { for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); } + +void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + assert(Stmt.isRegionStmt() && + "Only region statements can be copied by the block generator"); + + // The region represented by the statement. + Region *R = Stmt.getRegion(); + + // The "BBMap" for the whole region. + ValueMapT RegionMap; + + // Iterate over all blocks in the region in a breadth-first search. + std::deque Blocks; + SmallPtrSet SeenBlocks; + Blocks.push_back(R->getEntry()); + SeenBlocks.insert(R->getEntry()); + + while (!Blocks.empty()) { + BasicBlock *BB = Blocks.front(); + Blocks.pop_front(); + + // Copy the block with the BlockGenerator. + BasicBlock *BBCopy = copyBB(Stmt, BB, RegionMap, GlobalMap, LTS); + + // And continue with new successors inside the region. + for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) + if (R->contains(*SI) && SeenBlocks.insert(*SI).second) + Blocks.push_back(*SI); + + // In order to remap PHI nodes we store also basic block mappings. + RegionMap[BB] = BBCopy; + } + + // Now create a new dedicated region exit block and add it to the region map. + BasicBlock *RegionExit = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + RegionExit->setName("polly.stmt." + R->getExit()->getName() + ".pre"); + RegionMap[R->getExit()] = RegionExit; + + // As the block generator doesn't handle control flow we need to add the + // region control flow by hand after all blocks have been copied. + for (BasicBlock *BB : SeenBlocks) { + + BranchInst *BI = cast(BB->getTerminator()); + + BasicBlock *BBCopy = cast(RegionMap[BB]); + Instruction *BICopy = BBCopy->getTerminator(); + + Builder.SetInsertPoint(BBCopy); + copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); + BICopy->eraseFromParent(); + } + + // Reset the old insert point for the build. + Builder.SetInsertPoint(RegionExit->begin()); +} diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp index 007a4fc..a3876f1 100644 --- a/polly/lib/CodeGen/IslCodeGeneration.cpp +++ b/polly/lib/CodeGen/IslCodeGeneration.cpp @@ -65,8 +65,8 @@ public: : S(S), Builder(Builder), Annotator(Annotator), Rewriter(new SCEVExpander(SE, "polly")), ExprBuilder(Builder, IDToValue, *Rewriter), - BlockGen(Builder, LI, SE, DT, &ExprBuilder), P(P), DL(DL), LI(LI), - SE(SE), DT(DT) {} + BlockGen(Builder, LI, SE, DT, &ExprBuilder), RegionGen(BlockGen), P(P), + DL(DL), LI(LI), SE(SE), DT(DT) {} ~IslNodeBuilder() { delete Rewriter; } @@ -84,6 +84,10 @@ private: IslExprBuilder ExprBuilder; BlockGenerator BlockGen; + + /// @brief Generator for region statements. + RegionGenerator RegionGen; + Pass *P; const DataLayout &DL; LoopInfo &LI; @@ -800,7 +804,10 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { Stmt->setAstBuild(IslAstInfo::getBuild(User)); createSubstitutions(Expr, Stmt, VMap, LTS); - BlockGen.copyBB(*Stmt, VMap, LTS); + if (Stmt->isBlockStmt()) + BlockGen.copyStmt(*Stmt, VMap, LTS); + else + RegionGen.copyStmt(*Stmt, VMap, LTS); isl_ast_node_free(User); isl_id_free(Id); diff --git a/polly/test/Isl/CodeGen/non_affine_float_compare.ll b/polly/test/Isl/CodeGen/non_affine_float_compare.ll new file mode 100644 index 0000000..2b829d2 --- /dev/null +++ b/polly/test/Isl/CodeGen/non_affine_float_compare.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-codegen-isl -polly-no-early-exit -polly-allow-nonaffine-branches -S < %s | FileCheck %s +; +; void f(float *A) { +; for (int i = 0; i < 1024; i++) +; if (A[i] == A[i - 1]) +; A[i]++; +; A[i]++; +; } +; +; +; CHECK: polly.stmt.bb2: +; CHECK: %scevgep[[R0:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp3_p_scalar_ = load float* %scevgep[[R0]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %scevgep[[R2:[0-9]*]] = getelementptr float* %scevgep{{[0-9]*}}, i64 %polly.indvar +; CHECK: %tmp6_p_scalar_ = load float* %scevgep[[R2]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp7 = fcmp oeq float %tmp3_p_scalar_, %tmp6_p_scalar_ +; CHECK: br i1 %p_tmp7, label %polly.stmt.bb8, label %polly.stmt.bb12.pre + +; CHECK: polly.stmt.bb8: +; CHECK: %scevgep[[R3:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp10_p_scalar_ = load float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp11 = fadd float %tmp10_p_scalar_, 1.000000e+00 +; CHECK: store float %p_tmp11, float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 +; CHECK: br label %polly.stmt.bb12.pre + +; CHECK: polly.stmt.bb12.pre: +; CHECK: br label %polly.stmt.bb12 + +; CHECK: polly.stmt.bb12: +; CHECK: %scevgep[[R4:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp10b_p_scalar_ = load float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00 +; CHECK: store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 +; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb13, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb14 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp3 = load float* %tmp, align 4 + %tmp4 = add nsw i64 %indvars.iv, -1 + %tmp5 = getelementptr inbounds float* %A, i64 %tmp4 + %tmp6 = load float* %tmp5, align 4 + %tmp7 = fcmp oeq float %tmp3, %tmp6 + br i1 %tmp7, label %bb8, label %bb12 + +bb8: ; preds = %bb2 + %tmp9 = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp10 = load float* %tmp9, align 4 + %tmp11 = fadd float %tmp10, 1.000000e+00 + store float %tmp11, float* %tmp9, align 4 + br label %bb12 + +bb12: ; preds = %bb8, %bb2 + %tmp9b = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp10b = load float* %tmp9b, align 4 + %tmp11b = fadd float %tmp10b, 1.000000e+00 + store float %tmp11b, float* %tmp9b, align 4 + br label %bb13 + +bb13: ; preds = %bb12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb14: ; preds = %bb1 + ret void +} -- 2.7.4