From: Zhigang Gong Date: Wed, 12 Mar 2014 08:51:57 +0000 (+0800) Subject: GBE: fix the wrong usage of stack pointer and stack buffer. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=75228e0be722b8650a07e17d12f2d565eac11371;p=contrib%2Fbeignet.git GBE: fix the wrong usage of stack pointer and stack buffer. Stack pointer and stack buffer should be two different virtual register. One is a vector and the other is a scalar. The reason previous implementation could work is that it search curbe offset and make a new stack buffer register manually which is not good. Now fix it and remove those hacking code. We actually don't need to use curbe offset manually after the allocation. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" Reviewed-by: "Song, Ruiling" --- diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index b0402b9..b8f4171 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -538,7 +538,7 @@ namespace gbe // Insert the stack buffer if used if (useStackPtr) - insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); + insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); // After this point the vector is immutable. Sorting it will make // research faster diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index f6848b2..8bcf454 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -147,7 +147,7 @@ namespace gbe // Check that everything is consistent in the kernel code const uint32_t perLaneSize = kernel->getStackSize(); const uint32_t perThreadSize = perLaneSize * this->simdWidth; - const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); + //const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); GBE_ASSERT(perLaneSize > 0); GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); @@ -159,9 +159,8 @@ namespace gbe GenRegister::ud8grf(ir::ocl::stackptr) : GenRegister::ud16grf(ir::ocl::stackptr); const GenRegister stackptr = ra->genReg(selStatckPtr); - const uint32_t nr = offset / GEN_REG_SIZE; - const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t); - const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr); + const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer); + const GenRegister bufferptr = ra->genReg(selStackBuffer); // We compute the per-lane stack pointer here p->push(); diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index 0a64d81..f91e5d4 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -37,7 +37,7 @@ namespace ir { "local_size_0", "local_size_1", "local_size_2", "global_size_0", "global_size_1", "global_size_2", "global_offset_0", "global_offset_1", "global_offset_2", - "stack_pointer", + "stack_pointer", "stack_buffer", "block_ip", "barrier_id", "thread_number", "work_dimension", @@ -73,6 +73,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, goffset1, 1); DECL_NEW_REG(FAMILY_DWORD, goffset2, 1); DECL_NEW_REG(FAMILY_DWORD, stackptr, 0); + DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1); DECL_NEW_REG(FAMILY_WORD, blockip, 0); DECL_NEW_REG(FAMILY_DWORD, barrierid, 1); DECL_NEW_REG(FAMILY_DWORD, threadn, 1); diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index cda5edf..cc19fcb 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -60,15 +60,16 @@ namespace ir { static const Register goffset1 = Register(16); // get_global_offset(1) static const Register goffset2 = Register(17); // get_global_offset(2) static const Register stackptr = Register(18); // stack pointer - static const Register blockip = Register(19); // blockip - static const Register barrierid = Register(20);// barrierid - static const Register threadn = Register(21); // number of threads - static const Register workdim = Register(22); // work dimention. - static const Register emask = Register(23); // store the emask bits for the branching fix. - static const Register notemask = Register(24); // store the !emask bits for the branching fix. - static const Register barriermask = Register(25); // software mask for barrier. - static const Register retVal = Register(26); // helper register to do data flow analysis. - static const uint32_t regNum = 27; // number of special registers + static const Register stackbuffer = Register(19); // stack buffer base address. + static const Register blockip = Register(20); // blockip + static const Register barrierid = Register(21);// barrierid + static const Register threadn = Register(22); // number of threads + static const Register workdim = Register(23); // work dimention. + static const Register emask = Register(24); // store the emask bits for the branching fix. + static const Register notemask = Register(25); // store the !emask bits for the branching fix. + static const Register barriermask = Register(26); // software mask for barrier. + static const Register retVal = Register(27); // helper register to do data flow analysis. + static const uint32_t regNum = 28; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */