}
}
+ void GenContext::clearFlagRegister(void) {
+ // when group size not aligned to simdWidth, flag register need clear to
+ // make prediction(any8/16h) work correctly
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->curr.execWidth = 1;
+ p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), GenRegister::immud(0x0));
+ p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), GenRegister::immud(0x0));
+ p->pop();
+ }
+
void GenContext::emitStackPointer(void) {
using namespace ir;
schedulePostRegAllocation(*this, *this->sel);
if (OCL_OUTPUT_REG_ALLOC)
ra->outputAllocation();
+ this->clearFlagRegister();
this->emitStackPointer();
this->emitInstructionStream();
this->patchBranches();
INLINE const ir::Function &getFunction(void) const { return fn; }
/*! Simd width chosen for the current function */
INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
+ void clearFlagRegister(void);
/*! Emit the per-lane stack pointer computation */
void emitStackPointer(void);
/*! Emit the instructions */
sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel));
// Branch to the jump target
+ // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may not
+ // work correct, as flag register bits mapped to non-active lanes tend
+ // to be zero.
if (simdWidth == 8)
sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
else if (simdWidth == 16)