// when group size not aligned to simdWidth, flag register need clear to
// make prediction(any8/16h) work correctly
const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+ const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero));
+ const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one));
p->push();
p->curr.noMask = 1;
p->curr.predicate = GEN_PREDICATE_NONE;
p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL));
p->curr.noMask = 0;
p->MOV(blockip, GenRegister::immuw(0));
+ p->curr.execWidth = 1;
+ // FIXME, need to get the final use set of zero/one, if there is no user,
+ // no need to generate the following two instructions.
+ p->MOV(zero, GenRegister::immuw(0));
+ p->MOV(one, GenRegister::immw(-1));
p->pop();
}
allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
+ allocCurbeReg(zero, GBE_CURBE_ZERO);
+ allocCurbeReg(one, GBE_CURBE_ONE);
if (stackUse.size() != 0)
allocCurbeReg(stackbuffer, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
// Go over the arguments and find the related patch locations
GBE_CURBE_EXTRA_ARGUMENT,
GBE_CURBE_BLOCK_IP,
GBE_CURBE_THREAD_NUM,
+ GBE_CURBE_ZERO,
+ GBE_CURBE_ONE,
};
/*! Extra arguments use the negative range of sub-values */
"global_offset_0", "global_offset_1", "global_offset_2",
"stack_pointer", "stack_buffer",
"block_ip",
- "barrier_id", "thread_number",
- "work_dimension",
+ "barrier_id", "thread_number", "work_dimension",
+ "zero", "one",
"retVal"
};
DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
DECL_NEW_REG(FAMILY_DWORD, workdim, 1);
+ DECL_NEW_REG(FAMILY_DWORD, zero, 1);
+ DECL_NEW_REG(FAMILY_DWORD, one, 1);
DECL_NEW_REG(FAMILY_WORD, retVal, 1);
}
#undef DECL_NEW_REG
static const Register barrierid = Register(21);// barrierid
static const Register threadn = Register(22); // number of threads
static const Register workdim = Register(23); // work dimention.
- static const Register retVal = Register(24); // helper register to do data flow analysis.
- static const uint32_t regNum = 25; // number of special registers
+ static const Register zero = Register(24); // scalar register holds zero.
+ static const Register one = Register(25); // scalar register holds one.
+ static const Register retVal = Register(26); // helper register to do data flow analysis.
+ static const uint32_t regNum = 27; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */