// Insert the stack buffer if used
if (useStackPtr)
- insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+ insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
// After this point the vector is immutable. Sorting it will make
// research faster
// Check that everything is consistent in the kernel code
const uint32_t perLaneSize = kernel->getStackSize();
const uint32_t perThreadSize = perLaneSize * this->simdWidth;
- const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+ //const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
GBE_ASSERT(perLaneSize > 0);
GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
GenRegister::ud8grf(ir::ocl::stackptr) :
GenRegister::ud16grf(ir::ocl::stackptr);
const GenRegister stackptr = ra->genReg(selStatckPtr);
- const uint32_t nr = offset / GEN_REG_SIZE;
- const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t);
- const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr);
+ const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer);
+ const GenRegister bufferptr = ra->genReg(selStackBuffer);
// We compute the per-lane stack pointer here
p->push();
"local_size_0", "local_size_1", "local_size_2",
"global_size_0", "global_size_1", "global_size_2",
"global_offset_0", "global_offset_1", "global_offset_2",
- "stack_pointer",
+ "stack_pointer", "stack_buffer",
"block_ip",
"barrier_id", "thread_number",
"work_dimension",
DECL_NEW_REG(FAMILY_DWORD, goffset1, 1);
DECL_NEW_REG(FAMILY_DWORD, goffset2, 1);
DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+ DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1);
DECL_NEW_REG(FAMILY_WORD, blockip, 0);
DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
static const Register goffset1 = Register(16); // get_global_offset(1)
static const Register goffset2 = Register(17); // get_global_offset(2)
static const Register stackptr = Register(18); // stack pointer
- static const Register blockip = Register(19); // blockip
- static const Register barrierid = Register(20);// barrierid
- static const Register threadn = Register(21); // number of threads
- static const Register workdim = Register(22); // work dimention.
- static const Register emask = Register(23); // store the emask bits for the branching fix.
- static const Register notemask = Register(24); // store the !emask bits for the branching fix.
- static const Register barriermask = Register(25); // software mask for barrier.
- static const Register retVal = Register(26); // helper register to do data flow analysis.
- static const uint32_t regNum = 27; // number of special registers
+ static const Register stackbuffer = Register(19); // stack buffer base address.
+ static const Register blockip = Register(20); // blockip
+ static const Register barrierid = Register(21);// barrierid
+ static const Register threadn = Register(22); // number of threads
+ static const Register workdim = Register(23); // work dimention.
+ static const Register emask = Register(24); // store the emask bits for the branching fix.
+ static const Register notemask = Register(25); // store the !emask bits for the branching fix.
+ static const Register barriermask = Register(26); // software mask for barrier.
+ static const Register retVal = Register(27); // helper register to do data flow analysis.
+ static const uint32_t regNum = 28; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */