From: Ruiling Song Date: Mon, 24 Mar 2014 01:46:21 +0000 (+0800) Subject: GBE: Fix register liveness issue under simd mode. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8bc5ac8b591a162837105c9b8dd430ab203e9dde;p=contrib%2Fbeignet.git GBE: Fix register liveness issue under simd mode. As we run in SIMD mode with prediction mask to indicate active lanes, If a vreg is defined in a loop, and there are som uses of the vreg out of the loop, the define point may be run several times under *different* prediction mask. For these kinds of vreg, we must extend the vreg liveness into the whole loop. If we don't do this, it's liveness is killed before the def point inside loop. If the vreg's corresponding physical reg is assigned to other vreg during the killed period, and the instructions before kill point were re-executed with different prediction, the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data. This patch fixes the HaarFixture case in opencv. Signed-off-by: Ruiling Song Reviewed-by: Zhigang Gong --- diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index a853731..642301c 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -83,15 +83,6 @@ namespace gbe return this->liveness->getLiveIn(bb); } - /*! Get the extra liveOut information for the given block */ - INLINE const ir::Liveness::LiveOut &getExtraLiveOut(const ir::BasicBlock *bb) const { - return this->liveness->getExtraLiveOut(bb); - } - /*! Get the extra LiveIn information for the given block */ - INLINE const ir::Liveness::UEVar &getExtraLiveIn(const ir::BasicBlock *bb) const { - return this->liveness->getExtraLiveIn(bb); - } - void collectShifter(GenRegister dest, GenRegister src); void loadTopHalf(GenRegister dest, GenRegister src); void storeTopHalf(GenRegister dest, GenRegister src); diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 1ecb8ea..f28bf9a 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -870,15 +870,10 @@ namespace gbe for (auto reg : ctx.getLiveIn(bb)) this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID); - for (auto reg : ctx.getExtraLiveIn(bb)) - this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID); // All registers alive at the end of the block must have their intervals // updated as well for (auto reg : ctx.getLiveOut(bb)) this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID); - - for (auto reg : ctx.getExtraLiveOut(bb)) - this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID); } this->intervals[ocl::emask].minID = 0; diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp index ff5c575..15d3b0a 100644 --- a/backend/src/ir/context.cpp +++ b/backend/src/ir/context.cpp @@ -76,13 +76,14 @@ namespace ir { // function lowerReturn(unit, fn->getName()); + // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower + fn->sortLabels(); + fn->computeCFG(); + // Spill function argument to the stack if required and identify which // function arguments can use constant push lowerFunctionArguments(unit, fn->getName()); - // Properly order labels and compute the CFG - fn->sortLabels(); - fn->computeCFG(); const StackElem elem = fnStack.back(); fnStack.pop_back(); fn = elem.fn; diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index 71dcc1f..a6aecb5 100644 --- a/backend/src/ir/function.cpp +++ b/backend/src/ir/function.cpp @@ -52,6 +52,7 @@ namespace ir { Function::~Function(void) { for (auto block : blocks) GBE_DELETE(block); + for (auto loop : loops) GBE_DELETE(loop); for (auto arg : args) GBE_DELETE(arg); } @@ -59,6 +60,10 @@ namespace ir { return unit.getPointerFamily(); } + void Function::addLoop(const vector &bbs, const vector> &exits) { + loops.push_back(GBE_NEW(Loop, bbs, exits)); + } + void Function::sortLabels(void) { uint32_t last = 0; @@ -96,6 +101,17 @@ namespace ir { } }); + // fix labels for loops + for (auto &x : loops) { + for (auto &y : x->bbs) + y = labelMap[y]; + + for (auto &z : x->exits) { + z.first = labelMap[z.first]; + z.second = labelMap[z.second]; + } + } + // Reset the label to block mapping this->labels.resize(last); foreachBlock([&](BasicBlock &bb) { diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 2468e73..03aeaeb 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -134,6 +134,17 @@ namespace ir { return arg0.offset < arg1.offset; } + /*! CFG loops */ + struct Loop : public NonCopyable + { + public: + Loop(const vector &in, const vector> &exit) : + bbs(in), exits(exit) {} + vector bbs; + vector> exits; + GBE_STRUCT(Loop); + }; + /*! A function is : * - a register file * - a set of basic block layout into a CGF @@ -318,6 +329,9 @@ namespace ir { INLINE const uint32_t getStackSize(void) const { return this->stackSize; } /*! Push stack size. */ INLINE void pushStackSize(uint32_t step) { this->stackSize += step; } + /*! add the loop info for later liveness analysis */ + void addLoop(const vector &bbs, const vector> &exits); + INLINE const vector &getLoops() { return loops; } private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -327,6 +341,7 @@ namespace ir { vector labels; //!< Each label points to a basic block vector immediates; //!< All immediate values in the function vector blocks; //!< All chained basic blocks + vector loops; //!< Loops info of the function RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index 724d5c3..9be539f 100644 --- a/backend/src/ir/liveness.cpp +++ b/backend/src/ir/liveness.cpp @@ -38,19 +38,11 @@ namespace ir { if (op == OP_RET) { workSet.insert(info); info->liveOut.insert(ocl::retVal); - } else if (op == OP_BRA) { - // If this is a backward jump, put it to the extra work list. - if (((BranchInstruction*)lastInsn)->getLabelIndex() < bb.getLabelIndex()) - extraWorkSet.insert(info); } }); // Now with iterative analysis, we compute liveout and livein sets this->computeLiveInOut(); - for (auto it : extraWorkSet) { - for (auto reg : it->liveOut) { - it->extraLiveIn.insert(reg); - } - } + // extend register (def in loop, use out-of-loop) liveness to the whole loop this->computeExtraLiveInOut(); } @@ -110,8 +102,6 @@ namespace ir { BlockInfo *info = liveness[&bb]; auto &outVarSet = info->liveOut; auto &inVarSet = info->upwardUsed; - auto &extraInVarSet = info->extraLiveIn; - auto &extraOutVarSet = info->extraLiveOut; printf("\n\tin Lives: "); for (auto inVar : inVarSet) { printf("%d ", inVar); @@ -128,85 +118,61 @@ namespace ir { } /* - Consider the following scenario, %100's normal liveness will start from Ln-1's - position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the liveness - of %100 will be passed to Ln and then will not be passed to L0. - - But considering we are running on a multilane with predication's vector machine. - The unconditional BR in Ln-1 may be removed and it will enter Ln with a subset of - the revert set of Ln-1's predication. For example when running Ln-1, the active lane - is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a subset of 8-15 - will jump to L0. If a register %10 is allocated the same GRF as %100, given the fact - that their normal liveness doesn't overlapped, the a subset of 8-15 lanes will be - modified. If the %10 and %100 are the same vector data type, then we are fine. But if - %100 is a float vector, and the %10 is a bool or short vector, then we hit a bug here. - -L0: - ... - %10 = 5 - ... -Ln-1: - %100 = 2 - BR Ln+1 - -Ln: - ... - BR(%xxx) L0 - -Ln+1: - %101 = %100 + 2; - ... - - The solution to fix this issue is to build another liveness data. We will start with - those BBs with backward jump. Then pass all the liveOut register as extra liveIn - of current BB and then forward this extra liveIn to all the blocks. This is very similar - to the normal liveness analysis just with reverse direction. + As we run in SIMD mode with prediction mask to indicate active lanes. + If a vreg is defined in a loop, and there are som uses of the vreg out of the loop, + the define point may be run several times under *different* prediction mask. + For these kinds of vreg, we must extend the vreg liveness into the whole loop. + If we don't do this, it's liveness is killed before the def point inside loop. + If the vreg's corresponding physical reg is assigned to other vreg during the + killed period, and the instructions before kill point were re-executed with different prediction, + the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data. */ + void Liveness::computeExtraLiveInOut(void) { - while(!extraWorkSet.empty()) { - struct BlockInfo *currInfo = *extraWorkSet.begin(); - extraWorkSet.erase(currInfo); - for (auto currInVar : currInfo->extraLiveIn) - currInfo->extraLiveOut.insert(currInVar); - bool isChanged = false; - for (auto succ : currInfo->bb.getSuccessorSet()) { - BlockInfo *succInfo = liveness[succ]; - for (auto currOutVar : currInfo->extraLiveOut) { - bool changed = false; - if (!succInfo->upwardUsed.contains(currOutVar)) { - auto it = succInfo->extraLiveIn.insert(currOutVar); - changed = it.second; + const vector &loops = fn.getLoops(); + if(loops.size() == 0) return; + + for (auto l : loops) { + for (auto x : l->exits) { + const BasicBlock &a = fn.getBlock(x.first); + const BasicBlock &b = fn.getBlock(x.second); + BlockInfo * exiting = liveness[&a]; + BlockInfo * exit = liveness[&b]; + std::vector toExtend; + + if(b.getPredecessorSet().size() > 1) { + for (auto p : exit->upwardUsed) + toExtend.push_back(p); + } else { + std::set_intersection(exiting->liveOut.begin(), exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), std::back_inserter(toExtend)); + } + if (toExtend.size() == 0) continue; + + for (auto bb : l->bbs) { + BlockInfo * bI = liveness[&fn.getBlock(bb)]; + for(auto r : toExtend) { + if(!bI->upwardUsed.contains(r)) + bI->upwardUsed.insert(r); + bI->liveOut.insert(r); } - if (changed) isChanged = true; } - if (isChanged) - extraWorkSet.insert(succInfo);} - }; + } + } #if 0 fn.foreachBlock([this](const BasicBlock &bb){ printf("label %d:\n", bb.getLabelIndex()); BlockInfo *info = liveness[&bb]; auto &outVarSet = info->liveOut; auto &inVarSet = info->upwardUsed; - auto &extraInVarSet = info->extraLiveIn; - auto &extraOutVarSet = info->extraLiveOut; - printf("\n\tin Lives: "); + printf("\n\tLive Ins: "); for (auto inVar : inVarSet) { printf("%d ", inVar); } - printf("\n\textra in Lives: "); - for (auto inVar : extraInVarSet) { - printf("%d ", inVar); - } printf("\n"); - printf("\tout Lives: "); + printf("\tLive outs: "); for (auto outVar : outVarSet) { printf("%d ", outVar); } - printf("\n\textra out Lives: "); - for (auto outVar : extraOutVarSet) { - printf("%d ", outVar); - } printf("\n"); }); diff --git a/backend/src/ir/liveness.hpp b/backend/src/ir/liveness.hpp index 9198eae..11f1fdc 100644 --- a/backend/src/ir/liveness.hpp +++ b/backend/src/ir/liveness.hpp @@ -69,8 +69,6 @@ namespace ir { INLINE bool inVarKill(Register reg) const { return varKill.contains(reg); } - UEVar extraLiveIn; - LiveOut extraLiveOut; UEVar upwardUsed; LiveOut liveOut; VarKill varKill; @@ -96,17 +94,6 @@ namespace ir { return info.upwardUsed; } - /*! Get the set of extra registers alive at the end of the block */ - const LiveOut &getExtraLiveOut(const BasicBlock *bb) const { - const BlockInfo &info = this->getBlockInfo(bb); - return info.extraLiveOut; - } - /*! Get the set of extra registers alive at the beginning of the block */ - const UEVar &getExtraLiveIn(const BasicBlock *bb) const { - const BlockInfo &info = this->getBlockInfo(bb); - return info.extraLiveIn; - } - /*! Return the function the liveness was computed on */ INLINE const Function &getFunction(void) const { return fn; } /*! Actually do something for each successor / predecessor of *all* blocks */ @@ -143,7 +130,7 @@ namespace ir { void computeExtraLiveInOut(void); /*! Set of work list block which has exit(return) instruction */ typedef set WorkSet; - WorkSet workSet, extraWorkSet; + WorkSet workSet; /*! Use custom allocators */ GBE_CLASS(Liveness); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 94e5ad2..467e240 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -489,7 +489,6 @@ namespace gbe if(!bKernel) return false; LI = &getAnalysis(); - emitFunction(F); return false; } @@ -497,6 +496,8 @@ namespace gbe virtual bool doFinalization(Module &M) { return false; } /*! handle global variable register allocation (local, constant space) */ void allocateGlobalVariableRegister(Function &F); + /*! gather all the loops in the function and add them to ir::Function */ + void gatherLoopInfo(ir::Function &fn); /*! Emit the complete function code and declaration */ void emitFunction(Function &F); /*! Handle input and output function parameters */ @@ -1448,6 +1449,78 @@ namespace gbe } } + static INLINE void findAllLoops(LoopInfo * LI, std::vector> &lp) + { + for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) { + lp.push_back(std::make_pair(*I, -1)); + } + if (lp.size() == 0) return; + + uint32_t i = 0; + do { + const std::vector subLoops = lp[i].first->getSubLoops(); + for(auto sub : subLoops) + lp.push_back(std::make_pair(sub, i)); + i++; + } while(i < lp.size()); + } + + void GenWriter::gatherLoopInfo(ir::Function &fn) { + vector loopBBs; + vector> loopExits; + std::vector> lp; + + findAllLoops(LI, lp); +#if GBE_DEBUG + // check two loops' interference + for(unsigned int i = 0; i < lp.size(); i++) { + SmallVector exitBBs; + lp[i].first->getExitEdges(exitBBs); + + const std::vector &inBBs = lp[i].first->getBlocks(); + std::vector bbs1; + for(auto x : inBBs) { + bbs1.push_back(labelMap[x]); + } + std::sort(bbs1.begin(), bbs1.end()); + for(unsigned int j = i+1; j < lp.size(); j++) { + if(! lp[i].first->contains(lp[j].first)) { + const std::vector &inBBs2 = lp[j].first->getBlocks(); + std::vector bbs2; + std::vector bbs3; + + for(auto x : inBBs2) { + bbs2.push_back(labelMap[x]); + } + + std::sort(bbs2.begin(), bbs2.end()); + std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), bbs2.end(), std::back_inserter(bbs3)); + GBE_ASSERT(bbs3.size() < 1); + } + } + } +#endif + + for (auto loop : lp) { + loopBBs.clear(); + loopExits.clear(); + + const std::vector &inBBs = loop.first->getBlocks(); + for (auto b : inBBs) { + GBE_ASSERT(labelMap.find(b) != labelMap.end()); + loopBBs.push_back(labelMap[b]); + } + + SmallVector exitBBs; + loop.first->getExitEdges(exitBBs); + for(auto b : exitBBs){ + GBE_ASSERT(labelMap.find(b.first) != labelMap.end()); + GBE_ASSERT(labelMap.find(b.second) != labelMap.end()); + loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second])); + } + fn.addLoop(loopBBs, loopExits); + } + } void GenWriter::emitFunction(Function &F) { @@ -1464,6 +1537,7 @@ namespace gbe } ctx.startFunction(F.getName()); + ir::Function &fn = ctx.getFunction(); this->regTranslator.clear(); this->labelMap.clear(); this->emitFunctionPrototype(F); @@ -1484,11 +1558,13 @@ namespace gbe for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) this->simplifyTerminator(BB); + // gather loop info, which is useful for liveness analysis + gatherLoopInfo(fn); + // ... then, emit the instructions for all basic blocks pass = PASS_EMIT_INSTRUCTIONS; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) emitBasicBlock(BB); - ir::Function &fn = ctx.getFunction(); ctx.endFunction(); // Liveness can be shared when we optimized the immediates and the MOVs