return this->liveness->getLiveIn(bb);
}
- /*! Get the extra liveOut information for the given block */
- INLINE const ir::Liveness::LiveOut &getExtraLiveOut(const ir::BasicBlock *bb) const {
- return this->liveness->getExtraLiveOut(bb);
- }
- /*! Get the extra LiveIn information for the given block */
- INLINE const ir::Liveness::UEVar &getExtraLiveIn(const ir::BasicBlock *bb) const {
- return this->liveness->getExtraLiveIn(bb);
- }
-
void collectShifter(GenRegister dest, GenRegister src);
void loadTopHalf(GenRegister dest, GenRegister src);
void storeTopHalf(GenRegister dest, GenRegister src);
for (auto reg : ctx.getLiveIn(bb))
this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID);
- for (auto reg : ctx.getExtraLiveIn(bb))
- this->intervals[reg].minID = std::min(this->intervals[reg].minID, firstID);
// All registers alive at the end of the block must have their intervals
// updated as well
for (auto reg : ctx.getLiveOut(bb))
this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
-
- for (auto reg : ctx.getExtraLiveOut(bb))
- this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
}
this->intervals[ocl::emask].minID = 0;
// function
lowerReturn(unit, fn->getName());
+ // Properly order labels and compute the CFG, it's needed by FunctionArgumentLower
+ fn->sortLabels();
+ fn->computeCFG();
+
// Spill function argument to the stack if required and identify which
// function arguments can use constant push
lowerFunctionArguments(unit, fn->getName());
- // Properly order labels and compute the CFG
- fn->sortLabels();
- fn->computeCFG();
const StackElem elem = fnStack.back();
fnStack.pop_back();
fn = elem.fn;
Function::~Function(void) {
for (auto block : blocks) GBE_DELETE(block);
+ for (auto loop : loops) GBE_DELETE(loop);
for (auto arg : args) GBE_DELETE(arg);
}
return unit.getPointerFamily();
}
+ void Function::addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits) {
+ loops.push_back(GBE_NEW(Loop, bbs, exits));
+ }
+
void Function::sortLabels(void) {
uint32_t last = 0;
}
});
+ // fix labels for loops
+ for (auto &x : loops) {
+ for (auto &y : x->bbs)
+ y = labelMap[y];
+
+ for (auto &z : x->exits) {
+ z.first = labelMap[z.first];
+ z.second = labelMap[z.second];
+ }
+ }
+
// Reset the label to block mapping
this->labels.resize(last);
foreachBlock([&](BasicBlock &bb) {
return arg0.offset < arg1.offset;
}
+ /*! CFG loops */
+ struct Loop : public NonCopyable
+ {
+ public:
+ Loop(const vector<LabelIndex> &in, const vector<std::pair<LabelIndex, LabelIndex>> &exit) :
+ bbs(in), exits(exit) {}
+ vector<LabelIndex> bbs;
+ vector<std::pair<LabelIndex, LabelIndex>> exits;
+ GBE_STRUCT(Loop);
+ };
+
/*! A function is :
* - a register file
* - a set of basic block layout into a CGF
INLINE const uint32_t getStackSize(void) const { return this->stackSize; }
/*! Push stack size. */
INLINE void pushStackSize(uint32_t step) { this->stackSize += step; }
+ /*! add the loop info for later liveness analysis */
+ void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits);
+ INLINE const vector<Loop * > &getLoops() { return loops; }
private:
friend class Context; //!< Can freely modify a function
std::string name; //!< Function name
vector<BasicBlock*> labels; //!< Each label points to a basic block
vector<Immediate> immediates; //!< All immediate values in the function
vector<BasicBlock*> blocks; //!< All chained basic blocks
+ vector<Loop *> loops; //!< Loops info of the function
RegisterFile file; //!< RegisterDatas used by the instructions
Profile profile; //!< Current function profile
PushMap pushMap; //!< Pushed function arguments (reg->loc)
if (op == OP_RET) {
workSet.insert(info);
info->liveOut.insert(ocl::retVal);
- } else if (op == OP_BRA) {
- // If this is a backward jump, put it to the extra work list.
- if (((BranchInstruction*)lastInsn)->getLabelIndex() < bb.getLabelIndex())
- extraWorkSet.insert(info);
}
});
// Now with iterative analysis, we compute liveout and livein sets
this->computeLiveInOut();
- for (auto it : extraWorkSet) {
- for (auto reg : it->liveOut) {
- it->extraLiveIn.insert(reg);
- }
- }
+ // extend register (def in loop, use out-of-loop) liveness to the whole loop
this->computeExtraLiveInOut();
}
BlockInfo *info = liveness[&bb];
auto &outVarSet = info->liveOut;
auto &inVarSet = info->upwardUsed;
- auto &extraInVarSet = info->extraLiveIn;
- auto &extraOutVarSet = info->extraLiveOut;
printf("\n\tin Lives: ");
for (auto inVar : inVarSet) {
printf("%d ", inVar);
}
/*
- Consider the following scenario, %100's normal liveness will start from Ln-1's
- position. In normal analysis, the Ln-1 is not Ln's predecessor, thus the liveness
- of %100 will be passed to Ln and then will not be passed to L0.
-
- But considering we are running on a multilane with predication's vector machine.
- The unconditional BR in Ln-1 may be removed and it will enter Ln with a subset of
- the revert set of Ln-1's predication. For example when running Ln-1, the active lane
- is 0-7, then at Ln the active lane is 8-15. Then at the end of Ln, a subset of 8-15
- will jump to L0. If a register %10 is allocated the same GRF as %100, given the fact
- that their normal liveness doesn't overlapped, the a subset of 8-15 lanes will be
- modified. If the %10 and %100 are the same vector data type, then we are fine. But if
- %100 is a float vector, and the %10 is a bool or short vector, then we hit a bug here.
-
-L0:
- ...
- %10 = 5
- ...
-Ln-1:
- %100 = 2
- BR Ln+1
-
-Ln:
- ...
- BR(%xxx) L0
-
-Ln+1:
- %101 = %100 + 2;
- ...
-
- The solution to fix this issue is to build another liveness data. We will start with
- those BBs with backward jump. Then pass all the liveOut register as extra liveIn
- of current BB and then forward this extra liveIn to all the blocks. This is very similar
- to the normal liveness analysis just with reverse direction.
+ As we run in SIMD mode with prediction mask to indicate active lanes.
+ If a vreg is defined in a loop, and there are som uses of the vreg out of the loop,
+ the define point may be run several times under *different* prediction mask.
+ For these kinds of vreg, we must extend the vreg liveness into the whole loop.
+ If we don't do this, it's liveness is killed before the def point inside loop.
+ If the vreg's corresponding physical reg is assigned to other vreg during the
+ killed period, and the instructions before kill point were re-executed with different prediction,
+ the inactive lanes of vreg maybe over-written. Then the out-of-loop use will got wrong data.
*/
+
void Liveness::computeExtraLiveInOut(void) {
- while(!extraWorkSet.empty()) {
- struct BlockInfo *currInfo = *extraWorkSet.begin();
- extraWorkSet.erase(currInfo);
- for (auto currInVar : currInfo->extraLiveIn)
- currInfo->extraLiveOut.insert(currInVar);
- bool isChanged = false;
- for (auto succ : currInfo->bb.getSuccessorSet()) {
- BlockInfo *succInfo = liveness[succ];
- for (auto currOutVar : currInfo->extraLiveOut) {
- bool changed = false;
- if (!succInfo->upwardUsed.contains(currOutVar)) {
- auto it = succInfo->extraLiveIn.insert(currOutVar);
- changed = it.second;
+ const vector<Loop *> &loops = fn.getLoops();
+ if(loops.size() == 0) return;
+
+ for (auto l : loops) {
+ for (auto x : l->exits) {
+ const BasicBlock &a = fn.getBlock(x.first);
+ const BasicBlock &b = fn.getBlock(x.second);
+ BlockInfo * exiting = liveness[&a];
+ BlockInfo * exit = liveness[&b];
+ std::vector<Register> toExtend;
+
+ if(b.getPredecessorSet().size() > 1) {
+ for (auto p : exit->upwardUsed)
+ toExtend.push_back(p);
+ } else {
+ std::set_intersection(exiting->liveOut.begin(), exiting->liveOut.end(), exit->upwardUsed.begin(), exit->upwardUsed.end(), std::back_inserter(toExtend));
+ }
+ if (toExtend.size() == 0) continue;
+
+ for (auto bb : l->bbs) {
+ BlockInfo * bI = liveness[&fn.getBlock(bb)];
+ for(auto r : toExtend) {
+ if(!bI->upwardUsed.contains(r))
+ bI->upwardUsed.insert(r);
+ bI->liveOut.insert(r);
}
- if (changed) isChanged = true;
}
- if (isChanged)
- extraWorkSet.insert(succInfo);}
- };
+ }
+ }
#if 0
fn.foreachBlock([this](const BasicBlock &bb){
printf("label %d:\n", bb.getLabelIndex());
BlockInfo *info = liveness[&bb];
auto &outVarSet = info->liveOut;
auto &inVarSet = info->upwardUsed;
- auto &extraInVarSet = info->extraLiveIn;
- auto &extraOutVarSet = info->extraLiveOut;
- printf("\n\tin Lives: ");
+ printf("\n\tLive Ins: ");
for (auto inVar : inVarSet) {
printf("%d ", inVar);
}
- printf("\n\textra in Lives: ");
- for (auto inVar : extraInVarSet) {
- printf("%d ", inVar);
- }
printf("\n");
- printf("\tout Lives: ");
+ printf("\tLive outs: ");
for (auto outVar : outVarSet) {
printf("%d ", outVar);
}
- printf("\n\textra out Lives: ");
- for (auto outVar : extraOutVarSet) {
- printf("%d ", outVar);
- }
printf("\n");
});
INLINE bool inVarKill(Register reg) const {
return varKill.contains(reg);
}
- UEVar extraLiveIn;
- LiveOut extraLiveOut;
UEVar upwardUsed;
LiveOut liveOut;
VarKill varKill;
return info.upwardUsed;
}
- /*! Get the set of extra registers alive at the end of the block */
- const LiveOut &getExtraLiveOut(const BasicBlock *bb) const {
- const BlockInfo &info = this->getBlockInfo(bb);
- return info.extraLiveOut;
- }
- /*! Get the set of extra registers alive at the beginning of the block */
- const UEVar &getExtraLiveIn(const BasicBlock *bb) const {
- const BlockInfo &info = this->getBlockInfo(bb);
- return info.extraLiveIn;
- }
-
/*! Return the function the liveness was computed on */
INLINE const Function &getFunction(void) const { return fn; }
/*! Actually do something for each successor / predecessor of *all* blocks */
void computeExtraLiveInOut(void);
/*! Set of work list block which has exit(return) instruction */
typedef set <struct BlockInfo*> WorkSet;
- WorkSet workSet, extraWorkSet;
+ WorkSet workSet;
/*! Use custom allocators */
GBE_CLASS(Liveness);
if(!bKernel) return false;
LI = &getAnalysis<LoopInfo>();
-
emitFunction(F);
return false;
}
virtual bool doFinalization(Module &M) { return false; }
/*! handle global variable register allocation (local, constant space) */
void allocateGlobalVariableRegister(Function &F);
+ /*! gather all the loops in the function and add them to ir::Function */
+ void gatherLoopInfo(ir::Function &fn);
/*! Emit the complete function code and declaration */
void emitFunction(Function &F);
/*! Handle input and output function parameters */
}
}
+ static INLINE void findAllLoops(LoopInfo * LI, std::vector<std::pair<Loop*, int>> &lp)
+ {
+ for (Loop::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) {
+ lp.push_back(std::make_pair(*I, -1));
+ }
+ if (lp.size() == 0) return;
+
+ uint32_t i = 0;
+ do {
+ const std::vector<Loop*> subLoops = lp[i].first->getSubLoops();
+ for(auto sub : subLoops)
+ lp.push_back(std::make_pair(sub, i));
+ i++;
+ } while(i < lp.size());
+ }
+
+ void GenWriter::gatherLoopInfo(ir::Function &fn) {
+ vector<ir::LabelIndex> loopBBs;
+ vector<std::pair<ir::LabelIndex, ir::LabelIndex>> loopExits;
+ std::vector<std::pair<Loop*, int>> lp;
+
+ findAllLoops(LI, lp);
+#if GBE_DEBUG
+ // check two loops' interference
+ for(unsigned int i = 0; i < lp.size(); i++) {
+ SmallVector<Loop::Edge, 8> exitBBs;
+ lp[i].first->getExitEdges(exitBBs);
+
+ const std::vector<BasicBlock*> &inBBs = lp[i].first->getBlocks();
+ std::vector<ir::LabelIndex> bbs1;
+ for(auto x : inBBs) {
+ bbs1.push_back(labelMap[x]);
+ }
+ std::sort(bbs1.begin(), bbs1.end());
+ for(unsigned int j = i+1; j < lp.size(); j++) {
+ if(! lp[i].first->contains(lp[j].first)) {
+ const std::vector<BasicBlock*> &inBBs2 = lp[j].first->getBlocks();
+ std::vector<ir::LabelIndex> bbs2;
+ std::vector<ir::LabelIndex> bbs3;
+
+ for(auto x : inBBs2) {
+ bbs2.push_back(labelMap[x]);
+ }
+
+ std::sort(bbs2.begin(), bbs2.end());
+ std::set_intersection(bbs1.begin(), bbs1.end(), bbs2.begin(), bbs2.end(), std::back_inserter(bbs3));
+ GBE_ASSERT(bbs3.size() < 1);
+ }
+ }
+ }
+#endif
+
+ for (auto loop : lp) {
+ loopBBs.clear();
+ loopExits.clear();
+
+ const std::vector<BasicBlock*> &inBBs = loop.first->getBlocks();
+ for (auto b : inBBs) {
+ GBE_ASSERT(labelMap.find(b) != labelMap.end());
+ loopBBs.push_back(labelMap[b]);
+ }
+
+ SmallVector<Loop::Edge, 8> exitBBs;
+ loop.first->getExitEdges(exitBBs);
+ for(auto b : exitBBs){
+ GBE_ASSERT(labelMap.find(b.first) != labelMap.end());
+ GBE_ASSERT(labelMap.find(b.second) != labelMap.end());
+ loopExits.push_back(std::make_pair(labelMap[b.first], labelMap[b.second]));
+ }
+ fn.addLoop(loopBBs, loopExits);
+ }
+ }
void GenWriter::emitFunction(Function &F)
{
}
ctx.startFunction(F.getName());
+ ir::Function &fn = ctx.getFunction();
this->regTranslator.clear();
this->labelMap.clear();
this->emitFunctionPrototype(F);
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
this->simplifyTerminator(BB);
+ // gather loop info, which is useful for liveness analysis
+ gatherLoopInfo(fn);
+
// ... then, emit the instructions for all basic blocks
pass = PASS_EMIT_INSTRUCTIONS;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
emitBasicBlock(BB);
- ir::Function &fn = ctx.getFunction();
ctx.endFunction();
// Liveness can be shared when we optimized the immediates and the MOVs