From: Yang Rong Date: Thu, 9 Oct 2014 06:07:56 +0000 (+0800) Subject: BDW: Correct stack setting in BDW. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8d24605c064b72ca0ddc4b0060b77e54d458c81b;p=contrib%2Fbeignet.git BDW: Correct stack setting in BDW. Remove special fftid handle for HSW in Gen8Context, and change stack buffer address to QWORD, for curbe. Because it only waste 4 bytes register in other platform, change to QWORD for all platform. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong Reviewed-by: Junyan He --- diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index fbd4f9c..577cb9c 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -51,50 +51,6 @@ namespace gbe return i; } - void Gen8Context::emitStackPointer(void) { - using namespace ir; - - // Only emit stack pointer computation if we use a stack - if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) - return; - - // Check that everything is consistent in the kernel code - const uint32_t perLaneSize = kernel->getStackSize(); - const uint32_t perThreadSize = perLaneSize * this->simdWidth; - GBE_ASSERT(perLaneSize > 0); - GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); - GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); - - // Use shifts rather than muls which are limited to 32x16 bit sources - const uint32_t perLaneShift = logi2(perLaneSize); - const uint32_t perThreadShift = logi2(perThreadSize); - const GenRegister selStatckPtr = this->simdWidth == 8 ? - GenRegister::ud8grf(ir::ocl::stackptr) : - GenRegister::ud16grf(ir::ocl::stackptr); - const GenRegister stackptr = ra->genReg(selStatckPtr); - const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer); - const GenRegister bufferptr = ra->genReg(selStackBuffer); - - // We compute the per-lane stack pointer here - p->push(); - p->curr.execWidth = 1; - p->curr.predicate = GEN_PREDICATE_NONE; - //p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff)); - p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x7f)); - p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), GenRegister::immud(0x180)); - p->SHR(GenRegister::ud1grf(126,4), GenRegister::ud1grf(126, 4), GenRegister::immud(7)); - p->curr.execWidth = this->simdWidth; - p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift)); - p->curr.execWidth = 1; - p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(2)); - p->ADD(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::ud1grf(126, 4)); - p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift)); - p->curr.execWidth = this->simdWidth; - p->ADD(stackptr, stackptr, bufferptr); - p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0)); - p->pop(); - } - void Gen8Context::newSelection(void) { this->sel = GBE_NEW(Selection8, *this); } diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index ff3395d..627f4c0 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -38,8 +38,6 @@ namespace gbe }; /*! device's max srcatch buffer size */ #define GEN8_SCRATCH_SIZE (2 * KB * KB) - /*! Emit the per-lane stack pointer computation */ - virtual void emitStackPointer(void); /*! Align the scratch size to the device's scratch unit size */ virtual uint32_t alignScratchSize(uint32_t size); /*! Get the device's max srcatch size */ diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index fc69367..57cce4b 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -48,11 +48,11 @@ namespace ir { #if GBE_DEBUG #define DECL_NEW_REG(FAMILY, REG, UNIFORM) \ - r = fn.newRegister(FAMILY_DWORD, UNIFORM); \ + r = fn.newRegister(FAMILY, UNIFORM); \ GBE_ASSERT(r == REG); #else #define DECL_NEW_REG(FAMILY, REG, UNIFORM) \ - fn.newRegister(FAMILY_DWORD, UNIFORM); + fn.newRegister(FAMILY, UNIFORM); #endif /* GBE_DEBUG */ static void init(Function &fn) { IF_DEBUG(Register r); @@ -75,7 +75,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, goffset1, 1); DECL_NEW_REG(FAMILY_DWORD, goffset2, 1); DECL_NEW_REG(FAMILY_DWORD, stackptr, 0); - DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1); + DECL_NEW_REG(FAMILY_QWORD, stackbuffer, 1); DECL_NEW_REG(FAMILY_WORD, blockip, 0); DECL_NEW_REG(FAMILY_DWORD, barrierid, 1); DECL_NEW_REG(FAMILY_DWORD, threadn, 1);